Skip to content

Commit

Permalink
fix: to_datetime in Pandas 2
Browse files Browse the repository at this point in the history
  • Loading branch information
betodealmeida committed Aug 10, 2023
1 parent ce65a3b commit b497b6e
Show file tree
Hide file tree
Showing 2 changed files with 38 additions and 2 deletions.
10 changes: 8 additions & 2 deletions superset/utils/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -1834,7 +1834,12 @@ def normalize_dttm_col(
# Column is formatted as a numeric value
unit = _col.timestamp_format.replace("epoch_", "")
df[_col.col_label] = pd.to_datetime(
dttm_series, utc=False, unit=unit, origin="unix", errors="coerce"
dttm_series,
utc=False,
unit=unit,
origin="unix",
errors="raise",
exact=False,
)
else:
# Column has already been formatted as a timestamp.
Expand All @@ -1844,7 +1849,8 @@ def normalize_dttm_col(
df[_col.col_label],
utc=False,
format=_col.timestamp_format,
errors="coerce",
errors="raise",
exact=False,
)
if _col.offset:
df[_col.col_label] += timedelta(hours=_col.offset)
Expand Down
30 changes: 30 additions & 0 deletions tests/unit_tests/utils/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,14 @@
import os
from typing import Any, Optional

import pandas as pd
import pytest

from superset.utils.core import (
cast_to_boolean,
DateColumn,
is_test,
normalize_dttm_col,
parse_boolean_string,
QueryObjectFilterClause,
remove_extra_adhoc_filters,
Expand Down Expand Up @@ -171,3 +174,30 @@ def test_other_values():
assert cast_to_boolean([]) is False
assert cast_to_boolean({}) is False
assert cast_to_boolean(object()) is False


def test_normalize_dttm_col() -> None:
"""
Tests for the ``normalize_dttm_col`` function.
In particular, this covers a regression when Pandas was upgraded from 1.5.3 to
2.0.3 and the behavior of ``pd.to_datetime`` changed.
"""
df = pd.DataFrame({"__time": ["2017-07-01T00:00:00.000Z"]})
assert (
df.to_markdown()
== """
| | __time |
|---:|:-------------------------|
| 0 | 2017-07-01T00:00:00.000Z |
""".strip()
)

# in 1.5.3 this would return a datetime64[ns] dtype, but in 2.0.3 we had to
# add ``exact=False`` since there is a leftover after parsing the format
dttm_cols = (DateColumn("__time", "%Y-%m-%d"),)

# the function modifies the dataframe in place
normalize_dttm_col(df, dttm_cols)

assert df["__time"].astype(str).tolist() == ["2017-07-01"]

0 comments on commit b497b6e

Please sign in to comment.