Skip to content

Commit

Permalink
SNOW-1677888, SNOW-1677890: Add support for DataFrame.tz_convert and …
Browse files Browse the repository at this point in the history
…Series.tz_convert
  • Loading branch information
sfc-gh-helmeleegy committed Oct 4, 2024
1 parent 05ca10e commit c2b8cd9
Show file tree
Hide file tree
Showing 11 changed files with 388 additions and 29 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
- Added suppport for applying `rolling().count()` and `expanding().count()` to `Timedelta` series and columns.
- Added support for `tz` in both `pd.date_range` and `pd.bdate_range`.
- Added support for `Series.items`.
- Added support for `DataFrame.tz_convert` and `Series.tz_convert`.

#### Improvements

Expand Down
2 changes: 1 addition & 1 deletion docs/source/modin/supported/dataframe_supported.rst
Original file line number Diff line number Diff line change
Expand Up @@ -481,7 +481,7 @@ Methods
+-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+
| ``truncate`` | N | | |
+-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+
| ``tz_convert`` | N | | |
| ``tz_convert`` | P | ``axis``, ``level``, ``copy`` | |
+-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+
| ``tz_localize`` | N | | |
+-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+
Expand Down
2 changes: 1 addition & 1 deletion docs/source/modin/supported/series_supported.rst
Original file line number Diff line number Diff line change
Expand Up @@ -457,7 +457,7 @@ Methods
+-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+
| ``truncate`` | N | | |
+-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+
| ``tz_convert`` | N | | |
| ``tz_convert`` | P | ``axis``, ``level``, ``copy`` | |
+-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+
| ``tz_localize`` | N | | |
+-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+
Expand Down
27 changes: 20 additions & 7 deletions src/snowflake/snowpark/modin/plugin/_internal/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -1255,25 +1255,38 @@ def update_snowflake_quoted_identifiers_with_expressions(
def apply_snowpark_function_to_columns(
self,
snowpark_func: Callable[[Any], SnowparkColumn],
include_data: bool = True,
include_index: bool = False,
return_type: Optional[SnowparkPandasType] = None,
) -> "InternalFrame":
"""
Apply snowpark function callable to all data columns of an InternalFrame. If
include_index is True also apply this function to all index columns. The
snowflake quoted identifiers are preserved.
Apply snowpark function callable to all data columns and/or all index columns of an InternalFrame.
If include_data is True, apply the function to all data columns.
If include_index is True, apply the function to all index columns.
Raise an error if both include_data and include_index are False.
The snowflake quoted identifiers are preserved.
Arguments:
snowpark_func: Snowpark function to apply to columns of underlying snowpark df.
return_type: The optional SnowparkPandasType for the new column.
include_data: Whether to apply the function to data columns.
include_index: Whether to apply the function to index columns as well.
return_type: The optional SnowparkPandasType for the new column.
Returns:
InternalFrame with snowpark_func applies to columns of original frame, all other columns remain unchanged.
InternalFrame with snowpark_func applied to columns of original frame, all other columns remain unchanged.
"""
snowflake_ids = self.data_column_snowflake_quoted_identifiers
if include_index:

assert (
include_data or include_index
), "Internal error: Cannoy exclude both of data columns and index columns."
if include_data and include_index:
snowflake_ids = self.data_column_snowflake_quoted_identifiers
snowflake_ids.extend(self.index_column_snowflake_quoted_identifiers)
elif include_data:
snowflake_ids = self.data_column_snowflake_quoted_identifiers
else:
assert include_index
snowflake_ids = self.index_column_snowflake_quoted_identifiers

return self.update_snowflake_quoted_identifiers_with_expressions(
{col_id: snowpark_func(col(col_id)) for col_id in snowflake_ids},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11684,7 +11684,7 @@ def dt_property(

return SnowflakeQueryCompiler(
self._modin_frame.apply_snowpark_function_to_columns(
property_function, include_index
property_function, include_index=include_index
)
)

Expand Down Expand Up @@ -17243,7 +17243,7 @@ def dt_tz_localize(
return SnowflakeQueryCompiler(
self._modin_frame.apply_snowpark_function_to_columns(
lambda column: tz_localize_column(column, tz),
include_index,
include_index=include_index,
)
)

Expand All @@ -17265,7 +17265,7 @@ def dt_tz_convert(
return SnowflakeQueryCompiler(
self._modin_frame.apply_snowpark_function_to_columns(
lambda column: tz_convert_column(column, tz),
include_index,
include_index=include_index,
)
)

Expand Down Expand Up @@ -17346,7 +17346,9 @@ def ceil_func(column: SnowparkColumn) -> SnowparkColumn:

return SnowflakeQueryCompiler(
self._modin_frame.apply_snowpark_function_to_columns(
ceil_func, include_index, return_type
ceil_func,
include_index=include_index,
return_type=return_type,
)
)

Expand Down Expand Up @@ -17505,7 +17507,9 @@ def round_func(column: SnowparkColumn) -> SnowparkColumn:

return SnowflakeQueryCompiler(
self._modin_frame.apply_snowpark_function_to_columns(
round_func, include_index, return_type
round_func,
include_index=include_index,
return_type=return_type,
)
)

Expand Down Expand Up @@ -17578,7 +17582,9 @@ def floor_func(column: SnowparkColumn) -> SnowparkColumn:

return SnowflakeQueryCompiler(
self._modin_frame.apply_snowpark_function_to_columns(
floor_func, include_index, return_type
floor_func,
include_index=include_index,
return_type=return_type,
),
)

Expand All @@ -17600,7 +17606,8 @@ def normalize_column(column: SnowparkColumn) -> SnowparkColumn:

return SnowflakeQueryCompiler(
self._modin_frame.apply_snowpark_function_to_columns(
normalize_column, include_index
normalize_column,
include_index=include_index,
)
)

Expand Down Expand Up @@ -17633,7 +17640,8 @@ def month_name_func(column: SnowparkColumn) -> SnowparkColumn:

return SnowflakeQueryCompiler(
self._modin_frame.apply_snowpark_function_to_columns(
month_name_func, include_index
month_name_func,
include_index=include_index,
)
)

Expand Down Expand Up @@ -17666,7 +17674,8 @@ def day_name_func(column: SnowparkColumn) -> SnowparkColumn:

return SnowflakeQueryCompiler(
self._modin_frame.apply_snowpark_function_to_columns(
day_name_func, include_index
day_name_func,
include_index=include_index,
)
)

Expand All @@ -17688,7 +17697,7 @@ def dt_total_seconds(self, include_index: bool = False) -> "SnowflakeQueryCompil
self._modin_frame.apply_snowpark_function_to_columns(
# Cast the column to decimal of scale 9 to ensure no precision loss.
lambda x: x.cast(DecimalType(scale=9)) / 1_000_000_000,
include_index,
include_index=include_index,
)
)

Expand Down Expand Up @@ -18766,8 +18775,52 @@ def compare(

return result

def tz_convert(self, *args: Any, **kwargs: Any) -> None:
ErrorMessage.method_not_implemented_error("tz_convert", "BasePandasDataset")
def tz_convert(
self,
tz: Union[str, tzinfo],
axis: int = 0,
level: Optional[Level] = None,
copy: bool = True,
) -> "SnowflakeQueryCompiler":
"""
Convert tz-aware axis to target time zone.

Parameters
----------
tz : str or tzinfo object or None
Target time zone. Passing None will convert to UTC and remove the timezone information.
axis : {0 or ‘index’, 1 or ‘columns’}, default 0
The axis to convert
level : int, str, default None
If axis is a MultiIndex, convert a specific level. Otherwise must be None.
copy : bool, default True
Also make a copy of the underlying data.

Returns
-------
SnowflakeQueryCompiler
The result of applying time zone conversion.
"""
if axis in (1, "columns"):
ErrorMessage.not_implemented(
f"Snowpark pandas 'tz_convert' method doesn't yet support 'axis={axis}'"
)
if level is not None:
ErrorMessage.not_implemented(
"Snowpark pandas 'tz_convert' method doesn't yet support the 'level' parameter"
)
if copy is not True:
ErrorMessage.not_implemented(
"Snowpark pandas 'tz_convert' method doesn't support 'copy=False'"
)

return SnowflakeQueryCompiler(
self._modin_frame.apply_snowpark_function_to_columns(
lambda column: tz_convert_column(column, tz),
include_data=False,
include_index=True,
)
)

def tz_localize(self, *args: Any, **kwargs: Any) -> None:
ErrorMessage.method_not_implemented_error("tz_convert", "BasePandasDataset")
Expand Down Expand Up @@ -18815,5 +18868,8 @@ def timedelta_property(
f"Snowpark pandas doesn't yet support the property '{class_prefix}.{property_name}'"
) # pragma: no cover
return SnowflakeQueryCompiler(
self._modin_frame.apply_snowpark_function_to_columns(func, include_index)
self._modin_frame.apply_snowpark_function_to_columns(
func,
include_index=include_index,
)
)
46 changes: 46 additions & 0 deletions src/snowflake/snowpark/modin/plugin/docstrings/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -4179,6 +4179,52 @@ def to_timestamp():
Cast to DatetimeIndex of timestamps, at *beginning* of period.
"""

def tz_convert():
"""
Convert tz-aware axis to target time zone.
Parameters
----------
tz : str or tzinfo object or None
Target time zone. Passing None will convert to UTC and remove the timezone information.
axis : {0 or ‘index’, 1 or ‘columns’}, default 0
The axis to convert
level : int, str, default None
If axis is a MultiIndex, convert a specific level. Otherwise must be None.
copy : bool, default True
Also make a copy of the underlying data.
Returns
-------
Series/DataFrame
Object with time zone converted axis.
Raises
------
TypeError
If the axis is tz-naive.
Examples
--------
Change to another time zone:
>>> s = pd.Series(
... [1],
... index=pd.DatetimeIndex(['2018-09-15 01:30:00+02:00']),
... )
>>> s.tz_convert('Asia/Shanghai')
2018-09-15 07:30:00+08:00 1
Freq: None, dtype: int64
Pass None to convert to UTC and get a tz-naive index:
>>> s = pd.Series([1],
... index=pd.DatetimeIndex(['2018-09-15 01:30:00+02:00']))
>>> s.tz_convert(None)
2018-09-14 23:30:00 1
Freq: None, dtype: int64
"""

def truediv():
"""
Get floating division of ``DataFrame`` and `other`, element-wise (binary operator `truediv`).
Expand Down
46 changes: 46 additions & 0 deletions src/snowflake/snowpark/modin/plugin/docstrings/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -3425,6 +3425,52 @@ def truncate():
Truncate a Series before and after some index value.
"""

def tz_convert():
"""
Convert tz-aware axis to target time zone.
Parameters
----------
tz : str or tzinfo object or None
Target time zone. Passing None will convert to UTC and remove the timezone information.
axis : {0 or ‘index’, 1 or ‘columns’}, default 0
The axis to convert
level : int, str, default None
If axis is a MultiIndex, convert a specific level. Otherwise must be None.
copy : bool, default True
Also make a copy of the underlying data.
Returns
-------
Series/DataFrame
Object with time zone converted axis.
Raises
------
TypeError
If the axis is tz-naive.
Examples
--------
Change to another time zone:
>>> s = pd.Series(
... [1],
... index=pd.DatetimeIndex(['2018-09-15 01:30:00+02:00']),
... )
>>> s.tz_convert('Asia/Shanghai')
2018-09-15 07:30:00+08:00 1
Freq: None, dtype: int64
Pass None to convert to UTC and get a tz-naive index:
>>> s = pd.Series([1],
... index=pd.DatetimeIndex(['2018-09-15 01:30:00+02:00']))
>>> s.tz_convert(None)
2018-09-14 23:30:00 1
Freq: None, dtype: int64
"""

def unique():
"""
Return unique values of Series object.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -490,11 +490,6 @@ def truncate(
pass # pragma: no cover


@register_base_not_implemented()
def tz_convert(self, tz, axis=0, level=None, copy=True): # noqa: PR01, RT01, D200
pass # pragma: no cover


@register_base_not_implemented()
def tz_localize(
self, tz, axis=0, level=None, copy=True, ambiguous="raise", nonexistent="raise"
Expand Down
Loading

0 comments on commit c2b8cd9

Please sign in to comment.