Skip to content

Implement forecast decomposition for linear models #1164

Merged
merged 9 commits into from
Mar 16, 2023
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## Unreleased
### Added
- Method `predict_components` for forecast decomposition in `_SklearnAdapter` and `_LinearAdapter` for linear models ([#1164](https://github.com/tinkoff-ai/etna/pull/1164))
- Target components logic into base classes of models ([#1158](https://github.com/tinkoff-ai/etna/pull/1158))
- Target components logic to TSDataset ([#1153](https://github.com/tinkoff-ai/etna/pull/1153))
- Methods `save` and `load` to HierarchicalPipeline ([#1096](https://github.com/tinkoff-ai/etna/pull/1096))
Expand Down
122 changes: 100 additions & 22 deletions etna/models/linear.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,52 @@
import pandas as pd
from sklearn.linear_model import ElasticNet
from sklearn.linear_model import LinearRegression

from etna.models.sklearn import SklearnMultiSegmentModel
from etna.models.sklearn import SklearnPerSegmentModel
from etna.models.base import NonPredictionIntervalContextIgnorantAbstractModel
from etna.models.mixins import MultiSegmentModelMixin
from etna.models.mixins import NonPredictionIntervalContextIgnorantModelMixin
from etna.models.mixins import PerSegmentModelMixin
from etna.models.sklearn import _SklearnAdapter


class LinearPerSegmentModel(SklearnPerSegmentModel):
"""Class holding per segment :py:class:`sklearn.linear_model.LinearRegression`."""
class _LinearAdapter(_SklearnAdapter):
Mr-Geekman marked this conversation as resolved.
Show resolved Hide resolved
def predict_components(self, df: pd.DataFrame) -> pd.DataFrame:
"""Estimate prediction components.

Parameters
----------
df:
features dataframe

Returns
-------
:
dataframe with prediction components
"""
if self.regressor_columns is None:
raise ValueError("Model is not fitted! Fit the model before estimating forecast components!")

components_coefs = self.model.coef_
alex-hse-repository marked this conversation as resolved.
Show resolved Hide resolved
target_components = df[self.model.feature_names_in_].apply(pd.to_numeric)
target_components = components_coefs * target_components
if self.model.fit_intercept:
target_components["intercept"] = self.model.intercept_
target_components = target_components.add_prefix("target_component_")
return target_components


class LinearPerSegmentModel(
PerSegmentModelMixin,
NonPredictionIntervalContextIgnorantModelMixin,
NonPredictionIntervalContextIgnorantAbstractModel,
):
"""
Class holding per segment :py:class:`sklearn.linear_model.LinearRegression`.

Notes
-----
Target components are formed as the terms from linear regression formula.
"""

def __init__(self, fit_intercept: bool = True, **kwargs):
"""
Expand All @@ -20,11 +60,23 @@ def __init__(self, fit_intercept: bool = True, **kwargs):
"""
self.fit_intercept = fit_intercept
self.kwargs = kwargs
super().__init__(regressor=LinearRegression(fit_intercept=self.fit_intercept, **self.kwargs))
super().__init__(
base_model=_LinearAdapter(regressor=LinearRegression(fit_intercept=self.fit_intercept, **self.kwargs))
)


class ElasticPerSegmentModel(
PerSegmentModelMixin,
NonPredictionIntervalContextIgnorantModelMixin,
NonPredictionIntervalContextIgnorantAbstractModel,
):
"""
Class holding per segment :py:class:`sklearn.linear_model.ElasticNet`.

class ElasticPerSegmentModel(SklearnPerSegmentModel):
"""Class holding per segment :py:class:`sklearn.linear_model.ElasticNet`."""
Notes
-----
Target components are formed as the terms from linear regression formula.
"""

def __init__(self, alpha: float = 1.0, l1_ratio: float = 0.5, fit_intercept: bool = True, **kwargs):
"""
Expand Down Expand Up @@ -55,17 +107,29 @@ def __init__(self, alpha: float = 1.0, l1_ratio: float = 0.5, fit_intercept: boo
self.fit_intercept = fit_intercept
self.kwargs = kwargs
super().__init__(
regressor=ElasticNet(
alpha=self.alpha,
l1_ratio=self.l1_ratio,
fit_intercept=self.fit_intercept,
**self.kwargs,
base_model=_LinearAdapter(
regressor=ElasticNet(
alpha=self.alpha,
l1_ratio=self.l1_ratio,
fit_intercept=self.fit_intercept,
**self.kwargs,
)
)
)


class LinearMultiSegmentModel(SklearnMultiSegmentModel):
"""Class holding :py:class:`sklearn.linear_model.LinearRegression` for all segments."""
class LinearMultiSegmentModel(
MultiSegmentModelMixin,
NonPredictionIntervalContextIgnorantModelMixin,
NonPredictionIntervalContextIgnorantAbstractModel,
):
"""
Class holding :py:class:`sklearn.linear_model.LinearRegression` for all segments.

Notes
-----
Target components are formed as the terms from linear regression formula.
"""

def __init__(self, fit_intercept: bool = True, **kwargs):
"""
Expand All @@ -79,11 +143,23 @@ def __init__(self, fit_intercept: bool = True, **kwargs):
"""
self.fit_intercept = fit_intercept
self.kwargs = kwargs
super().__init__(regressor=LinearRegression(fit_intercept=self.fit_intercept, **self.kwargs))
super().__init__(
base_model=_LinearAdapter(regressor=LinearRegression(fit_intercept=self.fit_intercept, **self.kwargs))
)


class ElasticMultiSegmentModel(
MultiSegmentModelMixin,
NonPredictionIntervalContextIgnorantModelMixin,
NonPredictionIntervalContextIgnorantAbstractModel,
):
"""
Class holding :py:class:`sklearn.linear_model.ElasticNet` for all segments.

class ElasticMultiSegmentModel(SklearnMultiSegmentModel):
"""Class holding :py:class:`sklearn.linear_model.ElasticNet` for all segments."""
Notes
-----
Target components are formed as the terms from linear regression formula.
"""

def __init__(self, alpha: float = 1.0, l1_ratio: float = 0.5, fit_intercept: bool = True, **kwargs):
"""
Expand Down Expand Up @@ -114,10 +190,12 @@ def __init__(self, alpha: float = 1.0, l1_ratio: float = 0.5, fit_intercept: boo
self.fit_intercept = fit_intercept
self.kwargs = kwargs
super().__init__(
regressor=ElasticNet(
alpha=self.alpha,
l1_ratio=self.l1_ratio,
fit_intercept=self.fit_intercept,
**self.kwargs,
base_model=_LinearAdapter(
regressor=ElasticNet(
alpha=self.alpha,
l1_ratio=self.l1_ratio,
fit_intercept=self.fit_intercept,
**self.kwargs,
)
)
)
15 changes: 15 additions & 0 deletions etna/models/sklearn.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,21 @@ def predict(self, df: pd.DataFrame) -> np.ndarray:
pred = self.model.predict(features)
return pred

def predict_components(self, df: pd.DataFrame) -> pd.DataFrame:
"""Estimate prediction components.

Parameters
----------
df:
features dataframe

Returns
-------
:
dataframe with prediction components
"""
raise NotImplementedError("Prediction decomposition isn't currently implemented!")

def get_model(self) -> RegressorMixin:
"""Get internal sklearn model that is used inside etna class.

Expand Down
56 changes: 56 additions & 0 deletions tests/test_models/test_linear_model.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
from typing import List
from typing import Tuple

import numpy as np
import pandas as pd
import pytest
Expand All @@ -9,6 +12,7 @@
from etna.models.linear import ElasticPerSegmentModel
from etna.models.linear import LinearMultiSegmentModel
from etna.models.linear import LinearPerSegmentModel
from etna.models.linear import _LinearAdapter
from etna.pipeline import Pipeline
from etna.transforms.math import LagTransform
from etna.transforms.timestamp import DateFlagsTransform
Expand Down Expand Up @@ -42,6 +46,14 @@ def ts_with_categoricals(random_seed) -> TSDataset:
return ts


@pytest.fixture
def df_with_regressors(example_tsds) -> Tuple[pd.DataFrame, List[str]]:
lags = LagTransform(in_column="target", lags=[7], out_column="lag")
dflg = DateFlagsTransform(day_number_in_week=True, day_number_in_month=True, is_weekend=False, out_column="df")
example_tsds.fit_transform([lags, dflg])
return example_tsds.to_pandas(flatten=True).dropna(), example_tsds.regressors


def linear_segments_by_parameters(alpha_values, intercept_values):
dates = pd.date_range(start="2020-02-01", freq="D", periods=210)
x = np.arange(210)
Expand Down Expand Up @@ -269,3 +281,47 @@ def test_save_load(model, example_tsds):
horizon = 3
transforms = [LagTransform(in_column="target", lags=list(range(horizon, horizon + 3)))]
assert_model_equals_loaded_original(model=model, ts=example_tsds, transforms=transforms, horizon=horizon)


@pytest.mark.parametrize(
"fit_intercept, expected_component_names",
[
(
True,
[
"target_component_lag_7",
"target_component_df_day_number_in_week",
"target_component_df_day_number_in_month",
"target_component_intercept",
],
),
(
False,
[
"target_component_lag_7",
"target_component_df_day_number_in_week",
"target_component_df_day_number_in_month",
],
),
],
)
@pytest.mark.parametrize("regressor_constructor", (LinearRegression, ElasticNet))
def test_linear_adapter_predict_components_correct_names(
df_with_regressors, regressor_constructor, fit_intercept, expected_component_names
):
df, regressors = df_with_regressors
adapter = _LinearAdapter(regressor=regressor_constructor(fit_intercept=fit_intercept))
adapter.fit(df=df, regressors=regressors)
target_components = adapter.predict_components(df)
assert sorted(target_components.columns) == sorted(expected_component_names)


@pytest.mark.parametrize("fit_intercept", (True, False))
@pytest.mark.parametrize("regressor_constructor", (LinearRegression, ElasticNet))
def test_linear_adapter_predict_components_sum_up_to_target(df_with_regressors, regressor_constructor, fit_intercept):
brsnw250 marked this conversation as resolved.
Show resolved Hide resolved
df, regressors = df_with_regressors
adapter = _LinearAdapter(regressor=regressor_constructor(fit_intercept=fit_intercept))
adapter.fit(df=df, regressors=regressors)
target = adapter.predict(df)
target_components = adapter.predict_components(df)
np.testing.assert_array_almost_equal(target, target_components.sum(axis=1), decimal=10)