SNOW-1677888, SNOW-1677890: Add support for DataFrame.tz_convert and …

…Series.tz_convert
snowflakedb · Oct 4, 2024 · c2b8cd9 · c2b8cd9
1 parent 05ca10e
commit c2b8cd9
Show file tree

Hide file tree

Showing 11 changed files with 388 additions and 29 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -36,6 +36,7 @@
 - Added suppport for applying `rolling().count()` and `expanding().count()` to `Timedelta` series and columns.
 - Added support for `tz` in both `pd.date_range` and `pd.bdate_range`.
 - Added support for `Series.items`.
+- Added support for `DataFrame.tz_convert` and `Series.tz_convert`.
 
 #### Improvements
 

diff --git a/docs/source/modin/supported/dataframe_supported.rst b/docs/source/modin/supported/dataframe_supported.rst
@@ -481,7 +481,7 @@ Methods
 +-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+
 | ``truncate``                | N                               |                                  |                                                    |
 +-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+
-| ``tz_convert``              | N                               |                                  |                                                    |
+| ``tz_convert``              | P                               | ``axis``, ``level``, ``copy``    |                                                    |
 +-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+
 | ``tz_localize``             | N                               |                                  |                                                    |
 +-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+

diff --git a/docs/source/modin/supported/series_supported.rst b/docs/source/modin/supported/series_supported.rst
@@ -457,7 +457,7 @@ Methods
 +-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+
 | ``truncate``                | N                               |                                  |                                                    |
 +-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+
-| ``tz_convert``              | N                               |                                  |                                                    |
+| ``tz_convert``              | P                               | ``axis``, ``level``, ``copy``    |                                                    |
 +-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+
 | ``tz_localize``             | N                               |                                  |                                                    |
 +-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+

diff --git a/src/snowflake/snowpark/modin/plugin/_internal/frame.py b/src/snowflake/snowpark/modin/plugin/_internal/frame.py
@@ -1255,25 +1255,38 @@ def update_snowflake_quoted_identifiers_with_expressions(
     def apply_snowpark_function_to_columns(
         self,
         snowpark_func: Callable[[Any], SnowparkColumn],
+        include_data: bool = True,
         include_index: bool = False,
         return_type: Optional[SnowparkPandasType] = None,
     ) -> "InternalFrame":
         """
-        Apply snowpark function callable to all data columns of an InternalFrame. If
-        include_index is True also apply this function to all index columns. The
-        snowflake quoted identifiers are preserved.
+        Apply snowpark function callable to all data columns and/or all index columns of an InternalFrame.
+        If include_data is True, apply the function to all data columns.
+        If include_index is True, apply the function to all index columns.
+        Raise an error if both include_data and include_index are False.
+        The snowflake quoted identifiers are preserved.
 
         Arguments:
             snowpark_func: Snowpark function to apply to columns of underlying snowpark df.
-            return_type: The optional SnowparkPandasType for the new column.
+            include_data: Whether to apply the function to data columns.
             include_index: Whether to apply the function to index columns as well.
+            return_type: The optional SnowparkPandasType for the new column.
 
         Returns:
-            InternalFrame with snowpark_func applies to columns of original frame, all other columns remain unchanged.
+            InternalFrame with snowpark_func applied to columns of original frame, all other columns remain unchanged.
         """
-        snowflake_ids = self.data_column_snowflake_quoted_identifiers
-        if include_index:
+
+        assert (
+            include_data or include_index
+        ), "Internal error: Cannoy exclude both of data columns and index columns."
+        if include_data and include_index:
+            snowflake_ids = self.data_column_snowflake_quoted_identifiers
             snowflake_ids.extend(self.index_column_snowflake_quoted_identifiers)
+        elif include_data:
+            snowflake_ids = self.data_column_snowflake_quoted_identifiers
+        else:
+            assert include_index
+            snowflake_ids = self.index_column_snowflake_quoted_identifiers
 
         return self.update_snowflake_quoted_identifiers_with_expressions(
             {col_id: snowpark_func(col(col_id)) for col_id in snowflake_ids},

diff --git a/src/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py b/src/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py
@@ -11684,7 +11684,7 @@ def dt_property(
 
         return SnowflakeQueryCompiler(
             self._modin_frame.apply_snowpark_function_to_columns(
-                property_function, include_index
+                property_function, include_index=include_index
             )
         )
 
@@ -17243,7 +17243,7 @@ def dt_tz_localize(
         return SnowflakeQueryCompiler(
             self._modin_frame.apply_snowpark_function_to_columns(
                 lambda column: tz_localize_column(column, tz),
-                include_index,
+                include_index=include_index,
             )
         )
 
@@ -17265,7 +17265,7 @@ def dt_tz_convert(
         return SnowflakeQueryCompiler(
             self._modin_frame.apply_snowpark_function_to_columns(
                 lambda column: tz_convert_column(column, tz),
-                include_index,
+                include_index=include_index,
             )
         )
 
@@ -17346,7 +17346,9 @@ def ceil_func(column: SnowparkColumn) -> SnowparkColumn:
 
         return SnowflakeQueryCompiler(
             self._modin_frame.apply_snowpark_function_to_columns(
-                ceil_func, include_index, return_type
+                ceil_func,
+                include_index=include_index,
+                return_type=return_type,
             )
         )
 
@@ -17505,7 +17507,9 @@ def round_func(column: SnowparkColumn) -> SnowparkColumn:
 
         return SnowflakeQueryCompiler(
             self._modin_frame.apply_snowpark_function_to_columns(
-                round_func, include_index, return_type
+                round_func,
+                include_index=include_index,
+                return_type=return_type,
             )
         )
 
@@ -17578,7 +17582,9 @@ def floor_func(column: SnowparkColumn) -> SnowparkColumn:
 
         return SnowflakeQueryCompiler(
             self._modin_frame.apply_snowpark_function_to_columns(
-                floor_func, include_index, return_type
+                floor_func,
+                include_index=include_index,
+                return_type=return_type,
             ),
         )
 
@@ -17600,7 +17606,8 @@ def normalize_column(column: SnowparkColumn) -> SnowparkColumn:
 
         return SnowflakeQueryCompiler(
             self._modin_frame.apply_snowpark_function_to_columns(
-                normalize_column, include_index
+                normalize_column,
+                include_index=include_index,
             )
         )
 
@@ -17633,7 +17640,8 @@ def month_name_func(column: SnowparkColumn) -> SnowparkColumn:
 
         return SnowflakeQueryCompiler(
             self._modin_frame.apply_snowpark_function_to_columns(
-                month_name_func, include_index
+                month_name_func,
+                include_index=include_index,
             )
         )
 
@@ -17666,7 +17674,8 @@ def day_name_func(column: SnowparkColumn) -> SnowparkColumn:
 
         return SnowflakeQueryCompiler(
             self._modin_frame.apply_snowpark_function_to_columns(
-                day_name_func, include_index
+                day_name_func,
+                include_index=include_index,
             )
         )
 
@@ -17688,7 +17697,7 @@ def dt_total_seconds(self, include_index: bool = False) -> "SnowflakeQueryCompil
             self._modin_frame.apply_snowpark_function_to_columns(
                 # Cast the column to decimal of scale 9 to ensure no precision loss.
                 lambda x: x.cast(DecimalType(scale=9)) / 1_000_000_000,
-                include_index,
+                include_index=include_index,
             )
         )
 
@@ -18766,8 +18775,52 @@ def compare(
 
         return result
 
-    def tz_convert(self, *args: Any, **kwargs: Any) -> None:
-        ErrorMessage.method_not_implemented_error("tz_convert", "BasePandasDataset")
+    def tz_convert(
+        self,
+        tz: Union[str, tzinfo],
+        axis: int = 0,
+        level: Optional[Level] = None,
+        copy: bool = True,
+    ) -> "SnowflakeQueryCompiler":
+        """
+        Convert tz-aware axis to target time zone.
+
+        Parameters
+        ----------
+        tz : str or tzinfo object or None
+            Target time zone. Passing None will convert to UTC and remove the timezone information.
+        axis : {0 or ‘index’, 1 or ‘columns’}, default 0
+            The axis to convert
+        level : int, str, default None
+            If axis is a MultiIndex, convert a specific level. Otherwise must be None.
+        copy : bool, default True
+            Also make a copy of the underlying data.
+
+        Returns
+        -------
+        SnowflakeQueryCompiler
+            The result of applying time zone conversion.
+        """
+        if axis in (1, "columns"):
+            ErrorMessage.not_implemented(
+                f"Snowpark pandas 'tz_convert' method doesn't yet support 'axis={axis}'"
+            )
+        if level is not None:
+            ErrorMessage.not_implemented(
+                "Snowpark pandas 'tz_convert' method doesn't yet support the 'level' parameter"
+            )
+        if copy is not True:
+            ErrorMessage.not_implemented(
+                "Snowpark pandas 'tz_convert' method doesn't support 'copy=False'"
+            )
+
+        return SnowflakeQueryCompiler(
+            self._modin_frame.apply_snowpark_function_to_columns(
+                lambda column: tz_convert_column(column, tz),
+                include_data=False,
+                include_index=True,
+            )
+        )
 
     def tz_localize(self, *args: Any, **kwargs: Any) -> None:
         ErrorMessage.method_not_implemented_error("tz_convert", "BasePandasDataset")
@@ -18815,5 +18868,8 @@ def timedelta_property(
                 f"Snowpark pandas doesn't yet support the property '{class_prefix}.{property_name}'"
             )  # pragma: no cover
         return SnowflakeQueryCompiler(
-            self._modin_frame.apply_snowpark_function_to_columns(func, include_index)
+            self._modin_frame.apply_snowpark_function_to_columns(
+                func,
+                include_index=include_index,
+            )
         )
diff --git a/src/snowflake/snowpark/modin/plugin/docstrings/dataframe.py b/src/snowflake/snowpark/modin/plugin/docstrings/dataframe.py
@@ -4179,6 +4179,52 @@ def to_timestamp():
         Cast to DatetimeIndex of timestamps, at *beginning* of period.
         """
 
+    def tz_convert():
+        """
+        Convert tz-aware axis to target time zone.
+
+        Parameters
+        ----------
+        tz : str or tzinfo object or None
+            Target time zone. Passing None will convert to UTC and remove the timezone information.
+        axis : {0 or ‘index’, 1 or ‘columns’}, default 0
+            The axis to convert
+        level : int, str, default None
+            If axis is a MultiIndex, convert a specific level. Otherwise must be None.
+        copy : bool, default True
+            Also make a copy of the underlying data.
+
+        Returns
+        -------
+        Series/DataFrame
+            Object with time zone converted axis.
+
+        Raises
+        ------
+        TypeError
+            If the axis is tz-naive.
+
+        Examples
+        --------
+        Change to another time zone:
+
+        >>> s = pd.Series(
+        ...     [1],
+        ...     index=pd.DatetimeIndex(['2018-09-15 01:30:00+02:00']),
+        ... )
+        >>> s.tz_convert('Asia/Shanghai')
+        2018-09-15 07:30:00+08:00    1
+        Freq: None, dtype: int64
+
+        Pass None to convert to UTC and get a tz-naive index:
+
+        >>> s = pd.Series([1],
+        ...             index=pd.DatetimeIndex(['2018-09-15 01:30:00+02:00']))
+        >>> s.tz_convert(None)
+        2018-09-14 23:30:00    1
+        Freq: None, dtype: int64
+        """
+
     def truediv():
         """
         Get floating division of ``DataFrame`` and `other`, element-wise (binary operator `truediv`).

diff --git a/src/snowflake/snowpark/modin/plugin/docstrings/series.py b/src/snowflake/snowpark/modin/plugin/docstrings/series.py
@@ -3425,6 +3425,52 @@ def truncate():
         Truncate a Series before and after some index value.
         """
 
+    def tz_convert():
+        """
+        Convert tz-aware axis to target time zone.
+
+        Parameters
+        ----------
+        tz : str or tzinfo object or None
+            Target time zone. Passing None will convert to UTC and remove the timezone information.
+        axis : {0 or ‘index’, 1 or ‘columns’}, default 0
+            The axis to convert
+        level : int, str, default None
+            If axis is a MultiIndex, convert a specific level. Otherwise must be None.
+        copy : bool, default True
+            Also make a copy of the underlying data.
+
+        Returns
+        -------
+        Series/DataFrame
+            Object with time zone converted axis.
+
+        Raises
+        ------
+        TypeError
+            If the axis is tz-naive.
+
+        Examples
+        --------
+        Change to another time zone:
+
+        >>> s = pd.Series(
+        ...     [1],
+        ...     index=pd.DatetimeIndex(['2018-09-15 01:30:00+02:00']),
+        ... )
+        >>> s.tz_convert('Asia/Shanghai')
+        2018-09-15 07:30:00+08:00    1
+        Freq: None, dtype: int64
+
+        Pass None to convert to UTC and get a tz-naive index:
+
+        >>> s = pd.Series([1],
+        ...             index=pd.DatetimeIndex(['2018-09-15 01:30:00+02:00']))
+        >>> s.tz_convert(None)
+        2018-09-14 23:30:00    1
+        Freq: None, dtype: int64
+        """
+
     def unique():
         """
         Return unique values of Series object.

diff --git a/src/snowflake/snowpark/modin/plugin/extensions/base_overrides.py b/src/snowflake/snowpark/modin/plugin/extensions/base_overrides.py
@@ -490,11 +490,6 @@ def truncate(
     pass  # pragma: no cover
 
 
-@register_base_not_implemented()
-def tz_convert(self, tz, axis=0, level=None, copy=True):  # noqa: PR01, RT01, D200
-    pass  # pragma: no cover
-
-
 @register_base_not_implemented()
 def tz_localize(
     self, tz, axis=0, level=None, copy=True, ambiguous="raise", nonexistent="raise"