Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

SNOW-1677888, SNOW-1677890: Add support for DataFrame.tz_convert and Series.tz_convert #2399

Merged
merged 2 commits into from
Oct 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
- Added support for `Series.items`.
- Added support for `errors="ignore"` in `pd.to_datetime`.
- Added support for `DataFrame.tz_localize` and `Series.tz_localize`.
- Added support for `DataFrame.tz_convert` and `Series.tz_convert`.

#### Improvements

Expand Down
2 changes: 1 addition & 1 deletion docs/source/modin/supported/dataframe_supported.rst
Original file line number Diff line number Diff line change
Expand Up @@ -481,7 +481,7 @@ Methods
+-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+
| ``truncate`` | N | | |
+-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+
| ``tz_convert`` | N | | |
| ``tz_convert`` | P | ``axis``, ``level``, ``copy`` | |
+-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+
| ``tz_localize`` | P | ``axis``, ``level``, ``copy``, | |
| | | ``ambiguous``, ``nonexistent`` | |
Expand Down
2 changes: 1 addition & 1 deletion docs/source/modin/supported/series_supported.rst
Original file line number Diff line number Diff line change
Expand Up @@ -457,7 +457,7 @@ Methods
+-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+
| ``truncate`` | N | | |
+-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+
| ``tz_convert`` | N | | |
| ``tz_convert`` | P | ``axis``, ``level``, ``copy`` | |
+-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+
| ``tz_localize`` | P | ``axis``, ``level``, ``copy``, | |
| | | ``ambiguous``, ``nonexistent`` | |
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18771,6 +18771,53 @@ def compare(

return result

def tz_convert(
self,
tz: Union[str, tzinfo],
axis: int = 0,
level: Optional[Level] = None,
copy: bool = True,
) -> "SnowflakeQueryCompiler":
"""
Convert tz-aware axis to target time zone.

Parameters
----------
tz : str or tzinfo object or None
Target time zone. Passing None will convert to UTC and remove the timezone information.
axis : {0 or ‘index’, 1 or ‘columns’}, default 0
The axis to convert
level : int, str, default None
If axis is a MultiIndex, convert a specific level. Otherwise must be None.
copy : bool, default True
Also make a copy of the underlying data.

Returns
-------
SnowflakeQueryCompiler
The result of applying time zone conversion.
"""
if axis in (1, "columns"):
ErrorMessage.not_implemented(
f"Snowpark pandas 'tz_convert' method doesn't yet support 'axis={axis}'"
)
if level is not None:
ErrorMessage.not_implemented(
"Snowpark pandas 'tz_convert' method doesn't yet support the 'level' parameter"
)
if copy is not True:
ErrorMessage.not_implemented(
"Snowpark pandas 'tz_convert' method doesn't support 'copy=False'"
)

return SnowflakeQueryCompiler(
self._modin_frame.apply_snowpark_function_to_columns(
lambda column: tz_convert_column(column, tz),
include_data=False,
include_index=True,
)
)

def tz_localize(
self,
tz: Union[str, tzinfo],
Expand Down
50 changes: 48 additions & 2 deletions src/snowflake/snowpark/modin/plugin/docstrings/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -4179,6 +4179,52 @@ def to_timestamp():
Cast to DatetimeIndex of timestamps, at *beginning* of period.
"""

def tz_convert():
"""
Convert tz-aware axis to target time zone.

Parameters
----------
tz : str or tzinfo object or None
Target time zone. Passing None will convert to UTC and remove the timezone information.
axis : {0 or ‘index’, 1 or ‘columns’}, default 0
The axis to convert
level : int, str, default None
If axis is a MultiIndex, convert a specific level. Otherwise must be None.
copy : bool, default True
Also make a copy of the underlying data.

Returns
-------
Series/DataFrame
Object with time zone converted axis.

Raises
------
TypeError
If the axis is tz-naive.

Examples
--------
Change to another time zone:

>>> s = pd.Series(
... [1],
... index=pd.DatetimeIndex(['2018-09-15 01:30:00+02:00']),
... )
>>> s.tz_convert('Asia/Shanghai')
2018-09-15 07:30:00+08:00 1
Freq: None, dtype: int64

Pass None to convert to UTC and get a tz-naive index:

>>> s = pd.Series([1],
... index=pd.DatetimeIndex(['2018-09-15 01:30:00+02:00']))
>>> s.tz_convert(None)
2018-09-14 23:30:00 1
Freq: None, dtype: int64
"""

def tz_localize():
"""
Localize tz-naive index of a Series or DataFrame to target time zone.
Expand All @@ -4192,10 +4238,10 @@ def tz_localize():
axis : {0 or ‘index’, 1 or ‘columns’}, default 0
The axis to localize
level : int, str, default None
If axis ia a MultiIndex, localize a specific level. Otherwise must be None.
If axis is a MultiIndex, localize a specific level. Otherwise must be None.
copy : bool, default True
Also make a copy of the underlying data.
ambiguou: ‘infer’, bool-ndarray, ‘NaT’, default ‘raise’
ambiguous: ‘infer’, bool-ndarray, ‘NaT’, default ‘raise’
When clocks moved backward due to DST, ambiguous times may arise. For example in Central European Time (UTC+01), when going from 03:00 DST to 02:00 non-DST, 02:30:00 local time occurs both at 00:30:00 UTC and at 01:30:00 UTC. In such a situation, the ambiguous parameter dictates how ambiguous times should be handled.
- ‘infer’ will attempt to infer fall dst-transition hours based on order
- bool-ndarray where True signifies a DST time, False designates a non-DST time (note that this flag is only applicable for ambiguous times)
Expand Down
46 changes: 46 additions & 0 deletions src/snowflake/snowpark/modin/plugin/docstrings/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -3425,6 +3425,52 @@ def truncate():
Truncate a Series before and after some index value.
"""

def tz_convert():
"""
Convert tz-aware axis to target time zone.

Parameters
----------
tz : str or tzinfo object or None
Target time zone. Passing None will convert to UTC and remove the timezone information.
axis : {0 or ‘index’, 1 or ‘columns’}, default 0
The axis to convert
level : int, str, default None
If axis is a MultiIndex, convert a specific level. Otherwise must be None.
copy : bool, default True
Also make a copy of the underlying data.

Returns
-------
Series/DataFrame
Object with time zone converted axis.

Raises
------
TypeError
If the axis is tz-naive.

Examples
--------
Change to another time zone:

>>> s = pd.Series(
... [1],
... index=pd.DatetimeIndex(['2018-09-15 01:30:00+02:00']),
... )
>>> s.tz_convert('Asia/Shanghai')
2018-09-15 07:30:00+08:00 1
Freq: None, dtype: int64

Pass None to convert to UTC and get a tz-naive index:

>>> s = pd.Series([1],
... index=pd.DatetimeIndex(['2018-09-15 01:30:00+02:00']))
>>> s.tz_convert(None)
2018-09-14 23:30:00 1
Freq: None, dtype: int64
"""

def tz_localize():
"""
Localize tz-naive index of a Series or DataFrame to target time zone.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -490,11 +490,6 @@ def truncate(
pass # pragma: no cover


@register_base_not_implemented()
def tz_convert(self, tz, axis=0, level=None, copy=True): # noqa: PR01, RT01, D200
pass # pragma: no cover


@register_base_not_implemented()
def update(self, other) -> None: # noqa: PR01, RT01, D200
pass # pragma: no cover
Expand Down
108 changes: 108 additions & 0 deletions tests/integ/modin/frame/test_tz_convert.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
#
# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
#

import modin.pandas as pd
import pandas as native_pd
import pytest
import pytz

import snowflake.snowpark.modin.plugin # noqa: F401
from tests.integ.modin.utils import eval_snowpark_pandas_result
from tests.integ.utils.sql_counter import sql_count_checker

timezones = pytest.mark.parametrize(
"tz",
[
None,
# Use a subset of pytz.common_timezones containing a few timezones in each
*[
param_for_one_tz
for tz in [
"Africa/Abidjan",
"Africa/Timbuktu",
"America/Adak",
"America/Yellowknife",
"Antarctica/Casey",
"Asia/Dhaka",
"Asia/Manila",
"Asia/Shanghai",
"Atlantic/Stanley",
"Australia/Sydney",
"Canada/Pacific",
"Europe/Chisinau",
"Europe/Luxembourg",
"Indian/Christmas",
"Pacific/Chatham",
"Pacific/Wake",
"US/Arizona",
"US/Central",
"US/Eastern",
"US/Hawaii",
"US/Mountain",
"US/Pacific",
"UTC",
]
for param_for_one_tz in (
pytz.timezone(tz),
tz,
)
],
],
)


@sql_count_checker(query_count=1)
@timezones
def test_tz_convert(tz):
datetime_index = native_pd.DatetimeIndex(
[
"2014-04-04 23:56:01.000000001",
"2015-04-03",
pd.NaT,
],
tz="US/Eastern",
)
native_df = native_pd.DataFrame(
[[None, 2, 3], [4, None, 6], [7, 8, None]], datetime_index
)
snow_df = pd.DataFrame(native_df)

eval_snowpark_pandas_result(
snow_df,
native_df,
lambda df: df.tz_convert(tz),
)


@pytest.mark.parametrize(
"axis, level, copy, exception",
[
(1, None, None, NotImplementedError),
("columns", None, None, NotImplementedError),
(0, 1, None, NotImplementedError),
(0, None, False, NotImplementedError),
],
)
@sql_count_checker(query_count=0)
def test_tz_convert_negative(axis, level, copy, exception):
datetime_index = native_pd.DatetimeIndex(
[
"2014-04-04 23:56:01.000000001",
"2015-04-03",
pd.NaT,
],
tz="US/Eastern",
)
native_df = native_pd.DataFrame(
[[None, 2, 3], [4, None, 6], [7, 8, None]], datetime_index
)
snow_df = pd.DataFrame(native_df)

with pytest.raises(exception):
snow_df.tz_convert(
tz="UTC",
axis=axis,
level=level,
copy=copy,
)
Loading
Loading