diff --git a/CHANGELOG.md b/CHANGELOG.md index 159495f150a..62344969a52 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -36,6 +36,7 @@ - Added suppport for applying `rolling().count()` and `expanding().count()` to `Timedelta` series and columns. - Added support for `tz` in both `pd.date_range` and `pd.bdate_range`. - Added support for `Series.items`. +- Added support for `DataFrame.tz_convert` and `Series.tz_convert`. #### Improvements diff --git a/docs/source/modin/supported/dataframe_supported.rst b/docs/source/modin/supported/dataframe_supported.rst index a63fc4ee5ba..469661be4c4 100644 --- a/docs/source/modin/supported/dataframe_supported.rst +++ b/docs/source/modin/supported/dataframe_supported.rst @@ -481,7 +481,7 @@ Methods +-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ | ``truncate`` | N | | | +-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ -| ``tz_convert`` | N | | | +| ``tz_convert`` | P | ``axis``, ``level``, ``copy`` | | +-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ | ``tz_localize`` | N | | | +-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ diff --git a/docs/source/modin/supported/series_supported.rst b/docs/source/modin/supported/series_supported.rst index f3c51828e0b..34b0d8754c1 100644 --- a/docs/source/modin/supported/series_supported.rst +++ b/docs/source/modin/supported/series_supported.rst @@ -457,7 +457,7 @@ Methods +-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ | ``truncate`` | N | | | +-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ -| ``tz_convert`` | N | | | +| ``tz_convert`` | P | ``axis``, ``level``, ``copy`` | | +-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ | ``tz_localize`` | N | | | +-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ diff --git a/src/snowflake/snowpark/modin/plugin/_internal/frame.py b/src/snowflake/snowpark/modin/plugin/_internal/frame.py index 550771f4848..4764b3d0e6c 100644 --- a/src/snowflake/snowpark/modin/plugin/_internal/frame.py +++ b/src/snowflake/snowpark/modin/plugin/_internal/frame.py @@ -1255,25 +1255,38 @@ def update_snowflake_quoted_identifiers_with_expressions( def apply_snowpark_function_to_columns( self, snowpark_func: Callable[[Any], SnowparkColumn], + include_data: bool = True, include_index: bool = False, return_type: Optional[SnowparkPandasType] = None, ) -> "InternalFrame": """ - Apply snowpark function callable to all data columns of an InternalFrame. If - include_index is True also apply this function to all index columns. The - snowflake quoted identifiers are preserved. + Apply snowpark function callable to all data columns and/or all index columns of an InternalFrame. + If include_data is True, apply the function to all data columns. + If include_index is True, apply the function to all index columns. + Raise an error if both include_data and include_index are False. + The snowflake quoted identifiers are preserved. Arguments: snowpark_func: Snowpark function to apply to columns of underlying snowpark df. - return_type: The optional SnowparkPandasType for the new column. + include_data: Whether to apply the function to data columns. include_index: Whether to apply the function to index columns as well. + return_type: The optional SnowparkPandasType for the new column. Returns: - InternalFrame with snowpark_func applies to columns of original frame, all other columns remain unchanged. + InternalFrame with snowpark_func applied to columns of original frame, all other columns remain unchanged. """ - snowflake_ids = self.data_column_snowflake_quoted_identifiers - if include_index: + + assert ( + include_data or include_index + ), "Internal error: Cannoy exclude both of data columns and index columns." + if include_data and include_index: + snowflake_ids = self.data_column_snowflake_quoted_identifiers snowflake_ids.extend(self.index_column_snowflake_quoted_identifiers) + elif include_data: + snowflake_ids = self.data_column_snowflake_quoted_identifiers + else: + assert include_index + snowflake_ids = self.index_column_snowflake_quoted_identifiers return self.update_snowflake_quoted_identifiers_with_expressions( {col_id: snowpark_func(col(col_id)) for col_id in snowflake_ids}, diff --git a/src/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py b/src/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py index bf500508554..21cc043eee0 100644 --- a/src/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py +++ b/src/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py @@ -11684,7 +11684,7 @@ def dt_property( return SnowflakeQueryCompiler( self._modin_frame.apply_snowpark_function_to_columns( - property_function, include_index + property_function, include_index=include_index ) ) @@ -17243,7 +17243,7 @@ def dt_tz_localize( return SnowflakeQueryCompiler( self._modin_frame.apply_snowpark_function_to_columns( lambda column: tz_localize_column(column, tz), - include_index, + include_index=include_index, ) ) @@ -17265,7 +17265,7 @@ def dt_tz_convert( return SnowflakeQueryCompiler( self._modin_frame.apply_snowpark_function_to_columns( lambda column: tz_convert_column(column, tz), - include_index, + include_index=include_index, ) ) @@ -17346,7 +17346,9 @@ def ceil_func(column: SnowparkColumn) -> SnowparkColumn: return SnowflakeQueryCompiler( self._modin_frame.apply_snowpark_function_to_columns( - ceil_func, include_index, return_type + ceil_func, + include_index=include_index, + return_type=return_type, ) ) @@ -17505,7 +17507,9 @@ def round_func(column: SnowparkColumn) -> SnowparkColumn: return SnowflakeQueryCompiler( self._modin_frame.apply_snowpark_function_to_columns( - round_func, include_index, return_type + round_func, + include_index=include_index, + return_type=return_type, ) ) @@ -17578,7 +17582,9 @@ def floor_func(column: SnowparkColumn) -> SnowparkColumn: return SnowflakeQueryCompiler( self._modin_frame.apply_snowpark_function_to_columns( - floor_func, include_index, return_type + floor_func, + include_index=include_index, + return_type=return_type, ), ) @@ -17600,7 +17606,8 @@ def normalize_column(column: SnowparkColumn) -> SnowparkColumn: return SnowflakeQueryCompiler( self._modin_frame.apply_snowpark_function_to_columns( - normalize_column, include_index + normalize_column, + include_index=include_index, ) ) @@ -17633,7 +17640,8 @@ def month_name_func(column: SnowparkColumn) -> SnowparkColumn: return SnowflakeQueryCompiler( self._modin_frame.apply_snowpark_function_to_columns( - month_name_func, include_index + month_name_func, + include_index=include_index, ) ) @@ -17666,7 +17674,8 @@ def day_name_func(column: SnowparkColumn) -> SnowparkColumn: return SnowflakeQueryCompiler( self._modin_frame.apply_snowpark_function_to_columns( - day_name_func, include_index + day_name_func, + include_index=include_index, ) ) @@ -17688,7 +17697,7 @@ def dt_total_seconds(self, include_index: bool = False) -> "SnowflakeQueryCompil self._modin_frame.apply_snowpark_function_to_columns( # Cast the column to decimal of scale 9 to ensure no precision loss. lambda x: x.cast(DecimalType(scale=9)) / 1_000_000_000, - include_index, + include_index=include_index, ) ) @@ -18766,8 +18775,52 @@ def compare( return result - def tz_convert(self, *args: Any, **kwargs: Any) -> None: - ErrorMessage.method_not_implemented_error("tz_convert", "BasePandasDataset") + def tz_convert( + self, + tz: Union[str, tzinfo], + axis: int = 0, + level: Optional[Level] = None, + copy: bool = True, + ) -> "SnowflakeQueryCompiler": + """ + Convert tz-aware axis to target time zone. + + Parameters + ---------- + tz : str or tzinfo object or None + Target time zone. Passing None will convert to UTC and remove the timezone information. + axis : {0 or ‘index’, 1 or ‘columns’}, default 0 + The axis to convert + level : int, str, default None + If axis is a MultiIndex, convert a specific level. Otherwise must be None. + copy : bool, default True + Also make a copy of the underlying data. + + Returns + ------- + SnowflakeQueryCompiler + The result of applying time zone conversion. + """ + if axis in (1, "columns"): + ErrorMessage.not_implemented( + f"Snowpark pandas 'tz_convert' method doesn't yet support 'axis={axis}'" + ) + if level is not None: + ErrorMessage.not_implemented( + "Snowpark pandas 'tz_convert' method doesn't yet support the 'level' parameter" + ) + if copy is not True: + ErrorMessage.not_implemented( + "Snowpark pandas 'tz_convert' method doesn't support 'copy=False'" + ) + + return SnowflakeQueryCompiler( + self._modin_frame.apply_snowpark_function_to_columns( + lambda column: tz_convert_column(column, tz), + include_data=False, + include_index=True, + ) + ) def tz_localize(self, *args: Any, **kwargs: Any) -> None: ErrorMessage.method_not_implemented_error("tz_convert", "BasePandasDataset") @@ -18815,5 +18868,8 @@ def timedelta_property( f"Snowpark pandas doesn't yet support the property '{class_prefix}.{property_name}'" ) # pragma: no cover return SnowflakeQueryCompiler( - self._modin_frame.apply_snowpark_function_to_columns(func, include_index) + self._modin_frame.apply_snowpark_function_to_columns( + func, + include_index=include_index, + ) ) diff --git a/src/snowflake/snowpark/modin/plugin/docstrings/dataframe.py b/src/snowflake/snowpark/modin/plugin/docstrings/dataframe.py index c7b11c8fbea..b27b8182ef3 100644 --- a/src/snowflake/snowpark/modin/plugin/docstrings/dataframe.py +++ b/src/snowflake/snowpark/modin/plugin/docstrings/dataframe.py @@ -4179,6 +4179,52 @@ def to_timestamp(): Cast to DatetimeIndex of timestamps, at *beginning* of period. """ + def tz_convert(): + """ + Convert tz-aware axis to target time zone. + + Parameters + ---------- + tz : str or tzinfo object or None + Target time zone. Passing None will convert to UTC and remove the timezone information. + axis : {0 or ‘index’, 1 or ‘columns’}, default 0 + The axis to convert + level : int, str, default None + If axis is a MultiIndex, convert a specific level. Otherwise must be None. + copy : bool, default True + Also make a copy of the underlying data. + + Returns + ------- + Series/DataFrame + Object with time zone converted axis. + + Raises + ------ + TypeError + If the axis is tz-naive. + + Examples + -------- + Change to another time zone: + + >>> s = pd.Series( + ... [1], + ... index=pd.DatetimeIndex(['2018-09-15 01:30:00+02:00']), + ... ) + >>> s.tz_convert('Asia/Shanghai') + 2018-09-15 07:30:00+08:00 1 + Freq: None, dtype: int64 + + Pass None to convert to UTC and get a tz-naive index: + + >>> s = pd.Series([1], + ... index=pd.DatetimeIndex(['2018-09-15 01:30:00+02:00'])) + >>> s.tz_convert(None) + 2018-09-14 23:30:00 1 + Freq: None, dtype: int64 + """ + def truediv(): """ Get floating division of ``DataFrame`` and `other`, element-wise (binary operator `truediv`). diff --git a/src/snowflake/snowpark/modin/plugin/docstrings/series.py b/src/snowflake/snowpark/modin/plugin/docstrings/series.py index ccdd2e2f5a0..c39dd39570d 100644 --- a/src/snowflake/snowpark/modin/plugin/docstrings/series.py +++ b/src/snowflake/snowpark/modin/plugin/docstrings/series.py @@ -3425,6 +3425,52 @@ def truncate(): Truncate a Series before and after some index value. """ + def tz_convert(): + """ + Convert tz-aware axis to target time zone. + + Parameters + ---------- + tz : str or tzinfo object or None + Target time zone. Passing None will convert to UTC and remove the timezone information. + axis : {0 or ‘index’, 1 or ‘columns’}, default 0 + The axis to convert + level : int, str, default None + If axis is a MultiIndex, convert a specific level. Otherwise must be None. + copy : bool, default True + Also make a copy of the underlying data. + + Returns + ------- + Series/DataFrame + Object with time zone converted axis. + + Raises + ------ + TypeError + If the axis is tz-naive. + + Examples + -------- + Change to another time zone: + + >>> s = pd.Series( + ... [1], + ... index=pd.DatetimeIndex(['2018-09-15 01:30:00+02:00']), + ... ) + >>> s.tz_convert('Asia/Shanghai') + 2018-09-15 07:30:00+08:00 1 + Freq: None, dtype: int64 + + Pass None to convert to UTC and get a tz-naive index: + + >>> s = pd.Series([1], + ... index=pd.DatetimeIndex(['2018-09-15 01:30:00+02:00'])) + >>> s.tz_convert(None) + 2018-09-14 23:30:00 1 + Freq: None, dtype: int64 + """ + def unique(): """ Return unique values of Series object. diff --git a/src/snowflake/snowpark/modin/plugin/extensions/base_overrides.py b/src/snowflake/snowpark/modin/plugin/extensions/base_overrides.py index 8288e58c97d..72fb2b69488 100644 --- a/src/snowflake/snowpark/modin/plugin/extensions/base_overrides.py +++ b/src/snowflake/snowpark/modin/plugin/extensions/base_overrides.py @@ -490,11 +490,6 @@ def truncate( pass # pragma: no cover -@register_base_not_implemented() -def tz_convert(self, tz, axis=0, level=None, copy=True): # noqa: PR01, RT01, D200 - pass # pragma: no cover - - @register_base_not_implemented() def tz_localize( self, tz, axis=0, level=None, copy=True, ambiguous="raise", nonexistent="raise" diff --git a/tests/integ/modin/frame/test_tz_convert.py b/tests/integ/modin/frame/test_tz_convert.py new file mode 100644 index 00000000000..de82c2ed137 --- /dev/null +++ b/tests/integ/modin/frame/test_tz_convert.py @@ -0,0 +1,102 @@ +# +# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved. +# + +import modin.pandas as pd +import pandas as native_pd +import pytest +import pytz + +import snowflake.snowpark.modin.plugin # noqa: F401 +from tests.integ.modin.utils import eval_snowpark_pandas_result +from tests.integ.utils.sql_counter import sql_count_checker + +timezones = pytest.mark.parametrize( + "tz", + [ + None, + # Use a subset of pytz.common_timezones containing a few timezones in each + *[ + param_for_one_tz + for tz in [ + "Africa/Abidjan", + "Africa/Timbuktu", + "America/Adak", + "America/Yellowknife", + "Antarctica/Casey", + "Asia/Dhaka", + "Asia/Manila", + "Asia/Shanghai", + "Atlantic/Stanley", + "Australia/Sydney", + "Canada/Pacific", + "Europe/Chisinau", + "Europe/Luxembourg", + "Indian/Christmas", + "Pacific/Chatham", + "Pacific/Wake", + "US/Arizona", + "US/Central", + "US/Eastern", + "US/Hawaii", + "US/Mountain", + "US/Pacific", + "UTC", + ] + for param_for_one_tz in ( + pytz.timezone(tz), + tz, + ) + ], + ], +) + + +@sql_count_checker(query_count=1) +@timezones +def test_tz_convert(tz): + datetime_index = native_pd.DatetimeIndex( + [ + "2014-04-04 23:56:01.000000001", + pd.NaT, + ], + tz="US/Eastern", + ) + native_df = native_pd.DataFrame([[1, 2]], datetime_index) + snow_df = pd.DataFrame(native_df) + + eval_snowpark_pandas_result( + snow_df, + native_df, + lambda df: df.tz_convert(tz), + ) + + +@pytest.mark.parametrize( + "axis, level, copy, exception", + [ + (1, None, None, NotImplementedError), + ("columns", None, None, NotImplementedError), + (0, 1, None, NotImplementedError), + (0, None, False, NotImplementedError), + ], +) +@sql_count_checker(query_count=0) +def test_tz_convert_negative(axis, level, copy, exception): + datetime_index = native_pd.DatetimeIndex( + [ + "2014-04-04 23:56:01.000000001", + pd.NaT, + ], + tz="US/Eastern", + ) + native_df = native_pd.DataFrame([[1, 2]], datetime_index) + snow_df = pd.DataFrame(native_df) + + with pytest.raises(exception): + snow_df.tz_convert( + tz="UTC", + axis=axis, + level=level, + copy=copy, + ) diff --git a/tests/integ/modin/series/test_tz_convert.py b/tests/integ/modin/series/test_tz_convert.py new file mode 100644 index 00000000000..be20f46102a --- /dev/null +++ b/tests/integ/modin/series/test_tz_convert.py @@ -0,0 +1,102 @@ +# +# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved. +# + +import modin.pandas as pd +import pandas as native_pd +import pytest +import pytz + +import snowflake.snowpark.modin.plugin # noqa: F401 +from tests.integ.modin.utils import eval_snowpark_pandas_result +from tests.integ.utils.sql_counter import sql_count_checker + +timezones = pytest.mark.parametrize( + "tz", + [ + None, + # Use a subset of pytz.common_timezones containing a few timezones in each + *[ + param_for_one_tz + for tz in [ + "Africa/Abidjan", + "Africa/Timbuktu", + "America/Adak", + "America/Yellowknife", + "Antarctica/Casey", + "Asia/Dhaka", + "Asia/Manila", + "Asia/Shanghai", + "Atlantic/Stanley", + "Australia/Sydney", + "Canada/Pacific", + "Europe/Chisinau", + "Europe/Luxembourg", + "Indian/Christmas", + "Pacific/Chatham", + "Pacific/Wake", + "US/Arizona", + "US/Central", + "US/Eastern", + "US/Hawaii", + "US/Mountain", + "US/Pacific", + "UTC", + ] + for param_for_one_tz in ( + pytz.timezone(tz), + tz, + ) + ], + ], +) + + +@sql_count_checker(query_count=1) +@timezones +def test_tz_convert(tz): + datetime_index = native_pd.DatetimeIndex( + [ + "2014-04-04 23:56:01.000000001", + pd.NaT, + ], + tz="US/Eastern", + ) + native_ser = native_pd.Series([1, 2], datetime_index) + snow_ser = pd.Series(native_ser) + + eval_snowpark_pandas_result( + snow_ser, + native_ser, + lambda s: s.tz_convert(tz), + ) + + +@pytest.mark.parametrize( + "axis, level, copy, exception", + [ + (1, None, None, ValueError), + ("columns", None, None, ValueError), + (0, 1, None, NotImplementedError), + (0, None, False, NotImplementedError), + ], +) +@sql_count_checker(query_count=0) +def test_tz_convert_negative(axis, level, copy, exception): + datetime_index = native_pd.DatetimeIndex( + [ + "2014-04-04 23:56:01.000000001", + pd.NaT, + ], + tz="US/Eastern", + ) + native_ser = native_pd.Series([1, 2], datetime_index) + snow_ser = pd.Series(native_ser) + + with pytest.raises(exception): + snow_ser.tz_convert( + tz="UTC", + axis=axis, + level=level, + copy=copy, + ) diff --git a/tests/unit/modin/test_unsupported.py b/tests/unit/modin/test_unsupported.py index dfd70e725e5..25f25a4cdd6 100644 --- a/tests/unit/modin/test_unsupported.py +++ b/tests/unit/modin/test_unsupported.py @@ -118,7 +118,6 @@ def test_unsupported_general(general_method, kwargs): ["to_xml", {}], ["transform", {"func": [[], {}]}], ["truncate", {}], - ["tz_convert", {"tz": ""}], ["tz_localize", {"tz": ""}], ["xs", {"key": ""}], ["__dataframe__", {}], @@ -194,7 +193,6 @@ def test_unsupported_df(df_method, kwargs): ["to_xarray", {}], ["transform", {"func": ""}], ["truncate", {}], - ["tz_convert", {"tz": ""}], ["tz_localize", {"tz": ""}], ["view", {}], ["xs", {"key": ""}],