Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Separate out _convert_datetime_to_tsobject #17715

Merged
merged 7 commits into from
Oct 2, 2017
Merged
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
130 changes: 80 additions & 50 deletions pandas/_libs/tslib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -728,7 +728,7 @@ class Timestamp(_Timestamp):
# reconstruct & check bounds
ts_input = datetime(dts.year, dts.month, dts.day, dts.hour, dts.min,
dts.sec, dts.us, tzinfo=_tzinfo)
ts = convert_to_tsobject(ts_input, _tzinfo, None, 0, 0)
ts = convert_datetime_to_tsobject(ts_input, _tzinfo)
value = ts.value + (dts.ps // 1000)
if value != NPY_NAT:
_check_dts_bounds(&dts)
Expand Down Expand Up @@ -1475,52 +1475,11 @@ cdef convert_to_tsobject(object ts, object tz, object unit,
obj.value = ts
pandas_datetime_to_datetimestruct(ts, PANDAS_FR_ns, &obj.dts)
elif PyDateTime_Check(ts):
if tz is not None:
# sort of a temporary hack
if ts.tzinfo is not None:
if (hasattr(tz, 'normalize') and
hasattr(ts.tzinfo, '_utcoffset')):
ts = tz.normalize(ts)
obj.value = _pydatetime_to_dts(ts, &obj.dts)
obj.tzinfo = ts.tzinfo
else: #tzoffset
try:
tz = ts.astimezone(tz).tzinfo
except:
pass
obj.value = _pydatetime_to_dts(ts, &obj.dts)
ts_offset = get_utcoffset(ts.tzinfo, ts)
obj.value -= _delta_to_nanoseconds(ts_offset)
tz_offset = get_utcoffset(tz, ts)
obj.value += _delta_to_nanoseconds(tz_offset)
pandas_datetime_to_datetimestruct(obj.value,
PANDAS_FR_ns, &obj.dts)
obj.tzinfo = tz
elif not is_utc(tz):
ts = _localize_pydatetime(ts, tz)
obj.value = _pydatetime_to_dts(ts, &obj.dts)
obj.tzinfo = ts.tzinfo
else:
# UTC
obj.value = _pydatetime_to_dts(ts, &obj.dts)
obj.tzinfo = pytz.utc
else:
obj.value = _pydatetime_to_dts(ts, &obj.dts)
obj.tzinfo = ts.tzinfo

if obj.tzinfo is not None and not is_utc(obj.tzinfo):
offset = get_utcoffset(obj.tzinfo, ts)
obj.value -= _delta_to_nanoseconds(offset)

if is_timestamp(ts):
obj.value += ts.nanosecond
obj.dts.ps = ts.nanosecond * 1000
_check_dts_bounds(&obj.dts)
return obj
return convert_datetime_to_tsobject(ts, tz)
elif PyDate_Check(ts):
# Keep the converter same as PyDateTime's
ts = datetime.combine(ts, datetime_time())
return convert_to_tsobject(ts, tz, None, 0, 0)
return convert_datetime_to_tsobject(ts, tz)
elif getattr(ts, '_typ', None) == 'period':
raise ValueError(
"Cannot convert Period to Timestamp "
Expand All @@ -1538,6 +1497,68 @@ cdef convert_to_tsobject(object ts, object tz, object unit,
return obj


cdef _TSObject convert_datetime_to_tsobject(datetime ts, object tz,
int32_t nanos=0):
"""
Extract datetime and int64 from any of:
- python datetime object
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can u put a proper doc string

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is adapted from the convert_to_tsobject docstring.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

take the opportunity to improve it

- another timestamp object
"""
cdef:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

pls pls always add doc-strings

_TSObject obj = _TSObject()

if tz is not None:
tz = maybe_get_tz(tz)

# sort of a temporary hack
if ts.tzinfo is not None:
if (hasattr(tz, 'normalize') and
hasattr(ts.tzinfo, '_utcoffset')):
ts = tz.normalize(ts)
obj.value = _pydatetime_to_dts(ts, &obj.dts)
obj.tzinfo = ts.tzinfo
else:
# tzoffset
try:
tz = ts.astimezone(tz).tzinfo
except:
pass
obj.value = _pydatetime_to_dts(ts, &obj.dts)
ts_offset = get_utcoffset(ts.tzinfo, ts)
obj.value -= int(ts_offset.total_seconds() * 1e9)
tz_offset = get_utcoffset(tz, ts)
obj.value += int(tz_offset.total_seconds() * 1e9)
pandas_datetime_to_datetimestruct(obj.value,
PANDAS_FR_ns, &obj.dts)
obj.tzinfo = tz
elif not is_utc(tz):
ts = _localize_pydatetime(ts, tz)
obj.value = _pydatetime_to_dts(ts, &obj.dts)
obj.tzinfo = ts.tzinfo
else:
# UTC
obj.value = _pydatetime_to_dts(ts, &obj.dts)
obj.tzinfo = pytz.utc
else:
obj.value = _pydatetime_to_dts(ts, &obj.dts)
obj.tzinfo = ts.tzinfo

if obj.tzinfo is not None and not is_utc(obj.tzinfo):
offset = get_utcoffset(obj.tzinfo, ts)
obj.value -= int(offset.total_seconds() * 1e9)

if is_timestamp(ts):
obj.value += ts.nanosecond
obj.dts.ps = ts.nanosecond * 1000

if nanos:
obj.value += nanos
obj.dts.ps = nanos * 1000

_check_dts_bounds(&obj.dts)
return obj


cpdef convert_str_to_tsobject(object ts, object tz, object unit,
dayfirst=False, yearfirst=False):
""" ts must be a string """
Expand All @@ -1558,11 +1579,12 @@ cpdef convert_str_to_tsobject(object ts, object tz, object unit,
elif ts == 'now':
# Issue 9000, we short-circuit rather than going
# into np_datetime_strings which returns utc
ts = Timestamp.now(tz)
ts = datetime.now(tz)
elif ts == 'today':
# Issue 9000, we short-circuit rather than going
# into np_datetime_strings which returns a normalized datetime
ts = Timestamp.today(tz)
ts = datetime.now(tz)
# equiv: datetime.today().replace(tzinfo=tz)
else:
try:
_string_to_dts(ts, &obj.dts, &out_local, &out_tzoffset)
Expand All @@ -1577,7 +1599,15 @@ cpdef convert_str_to_tsobject(object ts, object tz, object unit,
return obj
else:
# Keep the converter same as PyDateTime's
ts = Timestamp(obj.value, tz=obj.tzinfo)
obj = convert_to_tsobject(obj.value, obj.tzinfo,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok so this is getting cumbersome. why don't you make a separate routine to do these 3 (and I guess go back to consrructing datetime). But why does the

ts = Timestamp(obj.value, tz=obj.tzinfo) not work here?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why don't you make a separate routine to do these 3 (and I guess go back to consrructing datetime)

OK

But why does the ts = Timestamp(obj.value, tz=obj.tzinfo) not work here?

It works, but the goal is to disentangle Timestamp from _TSObject logic. Or more specifically, to make the dependency go in one direction.

None, 0, 0)
dtime = datetime(obj.dts.year, obj.dts.month, obj.dts.day,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

use the existing routine to do this

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are you referring to create_datetime_from_ts?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

create_datetime_from_ts is a compatibility func specifically for ints_to_pydatetime (you can tell because it has several unused arguments that are retained to match the signature of create_timestamp_from_ts).

Replacing a one-line call to datetime(...) with a call to a one-line func that does exactly the same thing (but with less clarity because of the unused args) doesn't achieve much, with the possible exception of function call overhead.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I know I wrote it!

yet you are repeating code, I would prefer not to.

with the possible exception of function call overhead.

these take / return objects so the overhead is the same.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fine.

obj.dts.hour, obj.dts.min, obj.dts.sec,
obj.dts.us, obj.tzinfo)
obj = convert_datetime_to_tsobject(dtime, tz,
nanos=obj.dts.ps / 1000)
return obj

else:
ts = obj.value
if tz is not None:
Expand Down Expand Up @@ -1726,7 +1756,7 @@ def datetime_to_datetime64(ndarray[object] values):
else:
inferred_tz = get_timezone(val.tzinfo)

_ts = convert_to_tsobject(val, None, None, 0, 0)
_ts = convert_datetime_to_tsobject(val, None)
iresult[i] = _ts.value
_check_dts_bounds(&_ts.dts)
else:
Expand Down Expand Up @@ -2046,7 +2076,7 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise',
seen_datetime=1
if val.tzinfo is not None:
if utc_convert:
_ts = convert_to_tsobject(val, None, 'ns', 0, 0)
_ts = convert_datetime_to_tsobject(val, None)
iresult[i] = _ts.value
try:
_check_dts_bounds(&_ts.dts)
Expand Down Expand Up @@ -2155,7 +2185,7 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise',
raise TypeError("invalid string coercion to datetime")

try:
_ts = convert_to_tsobject(py_dt, None, None, 0, 0)
_ts = convert_datetime_to_tsobject(py_dt, None)
iresult[i] = _ts.value
except ValueError:
if is_coerce:
Expand Down