apache · villebro · Feb 16, 2021 · Feb 15, 2021 · robdiciuccio · Feb 17, 2021
diff --git a/superset/common/query_context.py b/superset/common/query_context.py
@@ -17,7 +17,6 @@
 import copy
 import logging
 import math
-from datetime import timedelta
 from typing import Any, cast, ClassVar, Dict, List, Optional, Union
 
 import numpy as np
@@ -112,17 +111,12 @@ def get_query_result(self, query_object: QueryObject) -> Dict[str, Any]:
         # If the datetime format is unix, the parse will use the corresponding
         # parsing logic
         if not df.empty:
-            if DTTM_ALIAS in df.columns:
-                if timestamp_format in ("epoch_s", "epoch_ms"):
-                    # Column has already been formatted as a timestamp.
-                    df[DTTM_ALIAS] = df[DTTM_ALIAS].apply(pd.Timestamp)
-                else:
-                    df[DTTM_ALIAS] = pd.to_datetime(
-                        df[DTTM_ALIAS], utc=False, format=timestamp_format
-                    )
-                if self.datasource.offset:
-                    df[DTTM_ALIAS] += timedelta(hours=self.datasource.offset)
-                df[DTTM_ALIAS] += query_object.time_shift
+            df = utils.normalize_dttm_col(
+                df=df,
+                timestamp_format=timestamp_format,
+                offset=self.datasource.offset,
+                time_shift=query_object.time_shift,
+            )
 
             if self.enforce_numerical_metrics:
                 self.df_metrics_to_num(df, query_object)

diff --git a/superset/utils/core.py b/superset/utils/core.py
@@ -76,6 +76,7 @@
 from flask_babel import gettext as __
 from flask_babel.speaklater import LazyString
 from pandas.api.types import infer_dtype
+from pandas.core.dtypes.common import is_numeric_dtype
 from sqlalchemy import event, exc, select, Text
 from sqlalchemy.dialects.mysql import MEDIUMTEXT
 from sqlalchemy.engine import Connection, Engine
@@ -1579,3 +1580,34 @@ def format_list(items: Sequence[str], sep: str = ", ", quote: str = '"') -> str:
 def find_duplicates(items: Iterable[InputType]) -> List[InputType]:
     """Find duplicate items in an iterable."""
     return [item for item, count in collections.Counter(items).items() if count > 1]
+
+
+def normalize_dttm_col(
+    df: pd.DataFrame,
+    timestamp_format: Optional[str],
+    offset: int,
+    time_shift: Optional[timedelta],
+) -> pd.DataFrame:
+    if DTTM_ALIAS not in df.columns:
+        return df
+    df = df.copy()
+    if timestamp_format in ("epoch_s", "epoch_ms"):
+        dttm_col = df[DTTM_ALIAS]
+        if is_numeric_dtype(dttm_col):
+            # Column is formatted as a numeric value
+            unit = timestamp_format.replace("epoch_", "")
+            df[DTTM_ALIAS] = pd.to_datetime(
+                dttm_col, utc=False, unit=unit, origin="unix"
+            )
+        else:
+            # Column has already been formatted as a timestamp.
+            df[DTTM_ALIAS] = dttm_col.apply(pd.Timestamp)
+    else:
+        df[DTTM_ALIAS] = pd.to_datetime(
+            df[DTTM_ALIAS], utc=False, format=timestamp_format
+        )
+    if offset:
+        df[DTTM_ALIAS] += timedelta(hours=offset)
+    if time_shift is not None:
+        df[DTTM_ALIAS] += time_shift
+    return df
diff --git a/superset/viz.py b/superset/viz.py
@@ -284,33 +284,12 @@ def get_df(self, query_obj: Optional[QueryObjectDict] = None) -> pd.DataFrame:
         # If the datetime format is unix, the parse will use the corresponding
         # parsing logic.
         if not df.empty:
-            if DTTM_ALIAS in df.columns:
-                if timestamp_format in ("epoch_s", "epoch_ms"):
-                    # Column has already been formatted as a timestamp.
-                    dttm_col = df[DTTM_ALIAS]
-                    one_ts_val = dttm_col[0]
-
-                    # convert time column to pandas Timestamp, but different
-                    # ways to convert depending on string or int types
-                    try:
-                        int(one_ts_val)
-                        is_integral = True
-                    except (ValueError, TypeError):
-                        is_integral = False
-                    if is_integral:
-                        unit = "s" if timestamp_format == "epoch_s" else "ms"
-                        df[DTTM_ALIAS] = pd.to_datetime(
-                            dttm_col, utc=False, unit=unit, origin="unix"
-                        )
-                    else:
-                        df[DTTM_ALIAS] = dttm_col.apply(pd.Timestamp)
-                else:
-                    df[DTTM_ALIAS] = pd.to_datetime(
-                        df[DTTM_ALIAS], utc=False, format=timestamp_format
-                    )
-                if self.datasource.offset:
-                    df[DTTM_ALIAS] += timedelta(hours=self.datasource.offset)
-                df[DTTM_ALIAS] += self.time_shift
+            df = utils.normalize_dttm_col(
+                df=df,
+                timestamp_format=timestamp_format,
+                offset=self.datasource.offset,
+                time_shift=self.time_shift,
+            )
 
             if self.enforce_numerical_metrics:
                 self.df_metrics_to_num(df)

diff --git a/tests/utils_tests.py b/tests/utils_tests.py
@@ -45,6 +45,7 @@
     cast_to_num,
     convert_legacy_filters_into_adhoc,
     create_ssl_cert_file,
+    DTTM_ALIAS,
     format_timedelta,
     GenericDataType,
     get_form_data_token,
@@ -59,6 +60,7 @@
     merge_extra_filters,
     merge_extra_form_data,
     merge_request_params,
+    normalize_dttm_col,
     parse_ssl_cert,
     parse_js_uri_path_item,
     extract_dataframe_dtypes,
@@ -1131,3 +1133,30 @@ def test_extract_dataframe_dtypes(self):
 
         df = pd.DataFrame(data={col[0]: col[2] for col in cols})
         assert extract_dataframe_dtypes(df) == [col[1] for col in cols]
+
+    def test_normalize_dttm_col(self):
+        ts = pd.Timestamp(2021, 2, 15, 19, 0, 0, 0)
+        df = pd.DataFrame([{"__timestamp": ts, "a": 1}])
+
+        # test regular (non-numeric) format
+        assert normalize_dttm_col(df, None, 0, None)[DTTM_ALIAS][0] == ts
+        assert normalize_dttm_col(df, "epoch_ms", 0, None)[DTTM_ALIAS][0] == ts
+        assert normalize_dttm_col(df, "epoch_s", 0, None)[DTTM_ALIAS][0] == ts
+
+        # test offset
+        assert normalize_dttm_col(df, None, 1, None)[DTTM_ALIAS][0] == pd.Timestamp(
+            2021, 2, 15, 20, 0, 0, 0
+        )
+
+        # test offset and timedelta
+        assert normalize_dttm_col(df, None, 1, timedelta(minutes=30))[DTTM_ALIAS][
+            0
+        ] == pd.Timestamp(2021, 2, 15, 20, 30, 0, 0)
+
+        # test numeric epoch_s format
+        df = pd.DataFrame([{"__timestamp": ts.timestamp(), "a": 1}])
+        assert normalize_dttm_col(df, "epoch_s", 0, None)[DTTM_ALIAS][0] == ts
+
+        # test numeric epoch_ms format
+        df = pd.DataFrame([{"__timestamp": ts.timestamp() * 1000, "a": 1}])
+        assert normalize_dttm_col(df, "epoch_ms", 0, None)[DTTM_ALIAS][0] == ts