diff --git a/CHANGELOG.md b/CHANGELOG.md index 4e6bb7f72b..c99ea5fc7d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -56,6 +56,7 @@ - Added support for `Series.dt.weekday`, `Series.dt.time`, and `DatetimeIndex.time`. - Added support for `Index.min` and `Index.max`. - Added support for `pd.merge_asof`. +- Added support for `Series.dt.normalize` and `DatetimeIndex.normalize`. #### Bug Fixes diff --git a/docs/source/modin/series.rst b/docs/source/modin/series.rst index 2b3dc406ad..eb23e556ab 100644 --- a/docs/source/modin/series.rst +++ b/docs/source/modin/series.rst @@ -270,6 +270,7 @@ Series Series.dt.floor Series.dt.ceil Series.dt.round + Series.dt.normalize .. rubric:: String accessor methods diff --git a/docs/source/modin/supported/datetime_index_supported.rst b/docs/source/modin/supported/datetime_index_supported.rst index 3cbc32a9f1..ccc553caaf 100644 --- a/docs/source/modin/supported/datetime_index_supported.rst +++ b/docs/source/modin/supported/datetime_index_supported.rst @@ -76,7 +76,7 @@ Methods +-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ | DataFrame method | Snowpark implemented? (Y/N/P/D) | Missing parameters | Notes for current implementation | +-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ -| ``normalize`` | N | | | +| ``normalize`` | Y | | | +-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ | ``strftime`` | N | | | +-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ diff --git a/docs/source/modin/supported/series_dt_supported.rst b/docs/source/modin/supported/series_dt_supported.rst index 981f3ba012..eb0a1c4732 100644 --- a/docs/source/modin/supported/series_dt_supported.rst +++ b/docs/source/modin/supported/series_dt_supported.rst @@ -84,7 +84,7 @@ the method in the left column. +-----------------------------+---------------------------------+----------------------------------------------------+ | ``tz_convert`` | N | | +-----------------------------+---------------------------------+----------------------------------------------------+ -| ``normalize`` | N | | +| ``normalize`` | Y | | +-----------------------------+---------------------------------+----------------------------------------------------+ | ``strftime`` | N | | +-----------------------------+---------------------------------+----------------------------------------------------+ diff --git a/src/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py b/src/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py index feecf5cd57..9e53c77a07 100644 --- a/src/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py +++ b/src/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py @@ -16339,17 +16339,32 @@ def dt_floor( ) ) - def dt_normalize(self) -> None: + def dt_normalize(self, include_index: bool = False) -> "SnowflakeQueryCompiler": """ Set the time component of each date-time value to midnight. + Args: + include_index: Whether to include the index columns in the operation. + Returns ------- BaseQueryCompiler New QueryCompiler containing date-time values with midnight time. """ - ErrorMessage.not_implemented( - "Snowpark pandas doesn't yet support the method 'Series.dt.normalize'" + internal_frame = self._modin_frame + + def normalize_column(col_id: str) -> SnowparkColumn: + return builtin("date_trunc")("d", col(col_id)) + + snowflake_ids = internal_frame.data_column_snowflake_quoted_identifiers[0:1] + if include_index: + snowflake_ids.extend( + internal_frame.index_column_snowflake_quoted_identifiers + ) + return SnowflakeQueryCompiler( + internal_frame.update_snowflake_quoted_identifiers_with_expressions( + {col_id: normalize_column(col_id) for col_id in snowflake_ids} + ).frame ) def dt_month_name( diff --git a/src/snowflake/snowpark/modin/plugin/docstrings/series_utils.py b/src/snowflake/snowpark/modin/plugin/docstrings/series_utils.py index 384ac868ac..802bdf1665 100644 --- a/src/snowflake/snowpark/modin/plugin/docstrings/series_utils.py +++ b/src/snowflake/snowpark/modin/plugin/docstrings/series_utils.py @@ -957,7 +957,45 @@ def swapcase(): pass def normalize(): - pass + """ + Convert times to midnight. + + + The time component of the date-time is converted to midnight i.e. 00:00:00. This is useful in cases, when the time does not matter. Length is unaltered. The timezones are unaffected. + + + This method is available on Series with datetime values under the .dt accessor, and directly on Datetime Array/Index. + + + Returns + ------- + DatetimeArray, DatetimeIndex or Series + The same type as the original data. Series will have the same name and index. DatetimeIndex will have the same name. + + See also + -------- + floor + Floor the datetimes to the specified freq. + ceil + Ceil the datetimes to the specified freq. + round + Round the datetimes to the specified freq. + + Examples + -------- + >>> idx = pd.date_range(start='2014-08-01 10:00', freq='h', + ... periods=3, tz='Asia/Calcutta') # doctest: +SKIP + >>> idx # doctest: +SKIP + DatetimeIndex(['2014-08-01 10:00:00+05:30', + '2014-08-01 11:00:00+05:30', + '2014-08-01 12:00:00+05:30'], + dtype='datetime64[ns, Asia/Calcutta]', freq=None) + >>> idx.normalize() # doctest: +SKIP + DatetimeIndex(['2014-08-01 00:00:00+05:30', + '2014-08-01 00:00:00+05:30', + '2014-08-01 00:00:00+05:30'], + dtype='datetime64[ns, Asia/Calcutta]', freq=None) + """ def translate(): """ diff --git a/src/snowflake/snowpark/modin/plugin/extensions/datetime_index.py b/src/snowflake/snowpark/modin/plugin/extensions/datetime_index.py index f27ec2d848..af12c554c2 100644 --- a/src/snowflake/snowpark/modin/plugin/extensions/datetime_index.py +++ b/src/snowflake/snowpark/modin/plugin/extensions/datetime_index.py @@ -787,7 +787,6 @@ def indexer_between_time( array([0, 1]) """ - @datetime_index_not_implemented() def normalize(self) -> DatetimeIndex: """ Convert times to midnight. @@ -826,6 +825,9 @@ def normalize(self) -> DatetimeIndex: '2014-08-01 00:00:00+05:30'], dtype='datetime64[ns, Asia/Calcutta]', freq=None) """ + return DatetimeIndex( + query_compiler=self._query_compiler.dt_normalize(include_index=True) + ) @datetime_index_not_implemented() def strftime(self, date_format: str) -> np.ndarray[np.object_]: diff --git a/tests/integ/modin/index/test_datetime_index_methods.py b/tests/integ/modin/index/test_datetime_index_methods.py index b0aaeab76a..d7bfee4d32 100644 --- a/tests/integ/modin/index/test_datetime_index_methods.py +++ b/tests/integ/modin/index/test_datetime_index_methods.py @@ -205,3 +205,15 @@ def test_day_month_name_negative(method): msg = f"Snowpark pandas method DatetimeIndex.{method} does not yet support the 'locale' parameter" with pytest.raises(NotImplementedError, match=msg): getattr(snow_index, method)(locale="pt_BR.utf8") + + +@sql_count_checker(query_count=1) +def test_normalize(): + native_index = native_pd.date_range(start="2021-01-01", periods=5, freq="7h") + native_index = native_index.append(native_pd.DatetimeIndex([pd.NaT])) + snow_index = pd.DatetimeIndex(native_index) + eval_snowpark_pandas_result( + snow_index, + native_index, + lambda i: i.normalize(), + ) diff --git a/tests/integ/modin/series/test_dt_accessor.py b/tests/integ/modin/series/test_dt_accessor.py index eee63667a2..a7094d5a83 100644 --- a/tests/integ/modin/series/test_dt_accessor.py +++ b/tests/integ/modin/series/test_dt_accessor.py @@ -159,6 +159,19 @@ def test_floor_ceil_round_negative(func, freq, ambiguous, nonexistent): ) +@sql_count_checker(query_count=1) +def test_normalize(): + date_range = native_pd.date_range(start="2021-01-01", periods=5, freq="7h") + native_ser = native_pd.Series(date_range) + native_ser.iloc[2] = native_pd.NaT + snow_ser = pd.Series(native_ser) + eval_snowpark_pandas_result( + snow_ser, + native_ser, + lambda s: s.dt.normalize(), + ) + + def test_isocalendar(): with SqlCounter(query_count=1): date_range = native_pd.date_range("2020-05-01", periods=5, freq="4D") diff --git a/tests/unit/modin/test_series_dt.py b/tests/unit/modin/test_series_dt.py index 405ae18207..d1f2b52bce 100644 --- a/tests/unit/modin/test_series_dt.py +++ b/tests/unit/modin/test_series_dt.py @@ -38,7 +38,6 @@ def mock_query_compiler_for_dt_series() -> SnowflakeQueryCompiler: (lambda s: s.dt.to_period(), "to_period"), (lambda s: s.dt.tz_localize(tz="UTC"), "tz_localize"), (lambda s: s.dt.tz_convert(tz="UTC"), "tz_convert"), - (lambda s: s.dt.normalize(), "normalize"), (lambda s: s.dt.strftime(date_format="YY/MM/DD"), "strftime"), (lambda s: s.dt.total_seconds(), "total_seconds"), (lambda s: s.dt.seconds, "seconds"),