Skip to content

Teach AutoARIMAModel to work with out-sample predictions #830

Merged
merged 9 commits into from
Aug 11, 2022
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
-
-
-
-
- Teach AutoARIMAModel to work with out-sample predictions ([#830](https://github.com/tinkoff-ai/etna/pull/830))
-
-
-
Expand Down
133 changes: 7 additions & 126 deletions etna/models/autoarima.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,12 @@
import warnings
from typing import List
from typing import Optional
from typing import Sequence

import numpy as np
import pandas as pd
import pmdarima as pm
from pmdarima.arima import ARIMA
from statsmodels.tools.sm_exceptions import ValueWarning
from statsmodels.tsa.statespace.sarimax import SARIMAXResultsWrapper

from etna.models.base import BaseAdapter
from etna.models.base import PerSegmentPredictionIntervalModel
from etna.models.sarimax import _SARIMAXBaseAdapter

warnings.filterwarnings(
message="No frequency information was provided, so inferred frequency .* will be used",
Expand All @@ -20,7 +16,7 @@
)


class _AutoARIMAAdapter(BaseAdapter):
class _AutoARIMAAdapter(_SARIMAXBaseAdapter):
"""
Class for holding auto arima model.

Expand All @@ -45,126 +41,11 @@ def __init__(
Training parameters for auto_arima from pmdarima package.
"""
self.kwargs = kwargs
self._model: Optional[ARIMA] = None
self.regressor_columns: List[str] = []
super().__init__()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This should be before self.kwargs I guess.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why? In class AutoARIMAModel(PerSegmentPredictionIntervalModel) we set self.kwargs before calling super constructor.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

By mistake, no?
It seems logic in parent class could reset self and so it's better to make __init__ call before

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

And vise-versa, logic in current class can reset the logic in the base class and broke smth.


def fit(self, df: pd.DataFrame, regressors: List[str]) -> "_AutoARIMAAdapter":
"""
Fits auto ARIMA model.

Parameters
----------
df:
Features dataframe
regressors:
List of the columns with regressors

Returns
-------
:
Fitted model
"""
self.regressor_columns = regressors
categorical_cols = df.select_dtypes(include=["category"]).columns.tolist()
try:
df.loc[:, categorical_cols] = df[categorical_cols].astype(int)
except ValueError:
raise ValueError(
f"Categorical columns {categorical_cols} can not been converted to int.\n "
"Try to encode this columns manually."
)

self._check_df(df)

targets = df["target"]
targets.index = df["timestamp"]

exog_train = self._select_regressors(df)

self._model = pm.auto_arima(df["target"], X=exog_train, **self.kwargs)
return self

def predict(self, df: pd.DataFrame, prediction_interval: bool, quantiles: Sequence[float]) -> pd.DataFrame:
"""
Compute predictions from auto ARIMA model.

Parameters
----------
df:
Features dataframe
prediction_interval:
If True returns prediction interval for forecast
quantiles:
Levels of prediction distribution

Returns
-------
:
DataFrame with predictions
"""
if self._model is None:
raise ValueError("AutoARIMA model is not fitted! Fit the model before calling predict method!")
horizon = len(df)
self._check_df(df, horizon)

categorical_cols = df.select_dtypes(include=["category"]).columns.tolist()
try:
df.loc[:, categorical_cols] = df[categorical_cols].astype(int)
except ValueError:
raise ValueError(
f"Categorical columns {categorical_cols} can not been converted to int.\n "
"Try to encode this columns manually."
)

exog_future = self._select_regressors(df)
if prediction_interval:
confints = np.unique([2 * i if i < 0.5 else 2 * (1 - i) for i in quantiles])

y_pred = pd.DataFrame({"target": self._model.predict(len(df), X=exog_future), "timestamp": df["timestamp"]})

for confint in confints:
forecast = self._model.predict(len(df), X=exog_future, return_conf_int=True, alpha=confint)
if confint / 2 in quantiles:
y_pred[f"target_{confint/2:.4g}"] = forecast[1][:, :1]
if 1 - confint / 2 in quantiles:
y_pred[f"target_{1 - confint/2:.4g}"] = forecast[1][:, 1:]
else:
y_pred = pd.DataFrame({"target": self._model.predict(len(df), X=exog_future), "timestamp": df["timestamp"]})
y_pred = y_pred.reset_index(drop=True, inplace=False)
return y_pred

def _check_df(self, df: pd.DataFrame, horizon: Optional[int] = None):
column_to_drop = [col for col in df.columns if col not in ["target", "timestamp"] + self.regressor_columns]
if column_to_drop:
warnings.warn(
message=f"AutoARIMA model does not work with exogenous features (features unknown in future).\n "
f"{column_to_drop} will be dropped"
)
if horizon:
short_regressors = [regressor for regressor in self.regressor_columns if df[regressor].count() < horizon]
if short_regressors:
raise ValueError(
f"Regressors {short_regressors} are too short for chosen horizon value.\n "
"Try lower horizon value, or drop this regressors."
)

def _select_regressors(self, df: pd.DataFrame) -> Optional[pd.DataFrame]:
if self.regressor_columns:
exog_future = df[self.regressor_columns]
exog_future.index = df["timestamp"]
else:
exog_future = None
return exog_future

def get_model(self) -> ARIMA:
"""Get internal pmdarima.arima.arima.ARIMA model that is used inside etna class.

Returns
-------
:
Internal model
"""
return self._model
def _get_fit_results(self, endog: pd.Series, exog: pd.DataFrame) -> SARIMAXResultsWrapper:
model = pm.auto_arima(endog, X=exog, **self.kwargs)
return model.arima_res_


class AutoARIMAModel(PerSegmentPredictionIntervalModel):
Expand Down
Loading