From 3642b01fedd20514b01b6428649a6de478accd5c Mon Sep 17 00:00:00 2001 From: alexbarros Date: Fri, 25 Oct 2024 08:42:38 -0300 Subject: [PATCH] feat: add adfuller stationary test parameters to config --- src/ydata_profiling/config.py | 4 +++- .../model/pandas/describe_timeseries_pandas.py | 9 ++++++--- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/src/ydata_profiling/config.py b/src/ydata_profiling/config.py index 3fdcef09c..5b0139338 100644 --- a/src/ydata_profiling/config.py +++ b/src/ydata_profiling/config.py @@ -1,7 +1,7 @@ """Configuration for the package.""" from enum import Enum from pathlib import Path -from typing import Any, Dict, List, Optional, Tuple, Union +from typing import Any, Dict, List, Literal, Optional, Tuple, Union import yaml from pydantic.v1 import BaseModel, BaseSettings, Field, PrivateAttr @@ -112,6 +112,8 @@ class TimeseriesVars(BaseModel): lags: List[int] = [1, 7, 12, 24, 30] significance: float = 0.05 pacf_acf_lag: int = 100 + autolag: Optional[Literal["AIC", "BIC", "t-stat"]] = "AIC" + maxlag: Optional[int] = None class Univariate(BaseModel): diff --git a/src/ydata_profiling/model/pandas/describe_timeseries_pandas.py b/src/ydata_profiling/model/pandas/describe_timeseries_pandas.py index 5ffe99a9f..0f2ca93a5 100644 --- a/src/ydata_profiling/model/pandas/describe_timeseries_pandas.py +++ b/src/ydata_profiling/model/pandas/describe_timeseries_pandas.py @@ -16,12 +16,15 @@ def stationarity_test(config: Settings, series: pd.Series) -> Tuple[bool, float]: - significance_threshold = config.vars.timeseries.significance - # make sure the data has no missing values - adfuller_test = adfuller(series.dropna()) + adfuller_test = adfuller( + series.dropna(), + autolag=config.vars.timeseries.autolag, + maxlag=config.vars.timeseries.maxlag + ) p_value = adfuller_test[1] + significance_threshold = config.vars.timeseries.significance return p_value < significance_threshold, p_value