diff --git a/CHANGELOG.md b/CHANGELOG.md index 7d2d1ce84..703f17aa3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## Unreleased ### Added +- Method `predict_components` for forecast decomposition in `_SklearnAdapter` and `_LinearAdapter` for linear models ([#1164](https://github.com/tinkoff-ai/etna/pull/1164)) - Target components logic into base classes of models ([#1158](https://github.com/tinkoff-ai/etna/pull/1158)) - Target components logic to TSDataset ([#1153](https://github.com/tinkoff-ai/etna/pull/1153)) - Methods `save` and `load` to HierarchicalPipeline ([#1096](https://github.com/tinkoff-ai/etna/pull/1096)) diff --git a/etna/models/linear.py b/etna/models/linear.py index 3da5fb348..2a09b2b72 100644 --- a/etna/models/linear.py +++ b/etna/models/linear.py @@ -1,12 +1,52 @@ +import pandas as pd from sklearn.linear_model import ElasticNet from sklearn.linear_model import LinearRegression -from etna.models.sklearn import SklearnMultiSegmentModel -from etna.models.sklearn import SklearnPerSegmentModel +from etna.models.base import NonPredictionIntervalContextIgnorantAbstractModel +from etna.models.mixins import MultiSegmentModelMixin +from etna.models.mixins import NonPredictionIntervalContextIgnorantModelMixin +from etna.models.mixins import PerSegmentModelMixin +from etna.models.sklearn import _SklearnAdapter -class LinearPerSegmentModel(SklearnPerSegmentModel): - """Class holding per segment :py:class:`sklearn.linear_model.LinearRegression`.""" +class _LinearAdapter(_SklearnAdapter): + def predict_components(self, df: pd.DataFrame) -> pd.DataFrame: + """Estimate prediction components. + + Parameters + ---------- + df: + features dataframe + + Returns + ------- + : + dataframe with prediction components + """ + if self.regressor_columns is None: + raise ValueError("Model is not fitted! Fit the model before estimating forecast components!") + + components_coefs = self.model.coef_ + target_components = df[self.model.feature_names_in_].apply(pd.to_numeric) + target_components = components_coefs * target_components + if self.model.fit_intercept: + target_components["intercept"] = self.model.intercept_ + target_components = target_components.add_prefix("target_component_") + return target_components + + +class LinearPerSegmentModel( + PerSegmentModelMixin, + NonPredictionIntervalContextIgnorantModelMixin, + NonPredictionIntervalContextIgnorantAbstractModel, +): + """ + Class holding per segment :py:class:`sklearn.linear_model.LinearRegression`. + + Notes + ----- + Target components are formed as the terms from linear regression formula. + """ def __init__(self, fit_intercept: bool = True, **kwargs): """ @@ -20,11 +60,23 @@ def __init__(self, fit_intercept: bool = True, **kwargs): """ self.fit_intercept = fit_intercept self.kwargs = kwargs - super().__init__(regressor=LinearRegression(fit_intercept=self.fit_intercept, **self.kwargs)) + super().__init__( + base_model=_LinearAdapter(regressor=LinearRegression(fit_intercept=self.fit_intercept, **self.kwargs)) + ) + +class ElasticPerSegmentModel( + PerSegmentModelMixin, + NonPredictionIntervalContextIgnorantModelMixin, + NonPredictionIntervalContextIgnorantAbstractModel, +): + """ + Class holding per segment :py:class:`sklearn.linear_model.ElasticNet`. -class ElasticPerSegmentModel(SklearnPerSegmentModel): - """Class holding per segment :py:class:`sklearn.linear_model.ElasticNet`.""" + Notes + ----- + Target components are formed as the terms from linear regression formula. + """ def __init__(self, alpha: float = 1.0, l1_ratio: float = 0.5, fit_intercept: bool = True, **kwargs): """ @@ -55,17 +107,29 @@ def __init__(self, alpha: float = 1.0, l1_ratio: float = 0.5, fit_intercept: boo self.fit_intercept = fit_intercept self.kwargs = kwargs super().__init__( - regressor=ElasticNet( - alpha=self.alpha, - l1_ratio=self.l1_ratio, - fit_intercept=self.fit_intercept, - **self.kwargs, + base_model=_LinearAdapter( + regressor=ElasticNet( + alpha=self.alpha, + l1_ratio=self.l1_ratio, + fit_intercept=self.fit_intercept, + **self.kwargs, + ) ) ) -class LinearMultiSegmentModel(SklearnMultiSegmentModel): - """Class holding :py:class:`sklearn.linear_model.LinearRegression` for all segments.""" +class LinearMultiSegmentModel( + MultiSegmentModelMixin, + NonPredictionIntervalContextIgnorantModelMixin, + NonPredictionIntervalContextIgnorantAbstractModel, +): + """ + Class holding :py:class:`sklearn.linear_model.LinearRegression` for all segments. + + Notes + ----- + Target components are formed as the terms from linear regression formula. + """ def __init__(self, fit_intercept: bool = True, **kwargs): """ @@ -79,11 +143,23 @@ def __init__(self, fit_intercept: bool = True, **kwargs): """ self.fit_intercept = fit_intercept self.kwargs = kwargs - super().__init__(regressor=LinearRegression(fit_intercept=self.fit_intercept, **self.kwargs)) + super().__init__( + base_model=_LinearAdapter(regressor=LinearRegression(fit_intercept=self.fit_intercept, **self.kwargs)) + ) + +class ElasticMultiSegmentModel( + MultiSegmentModelMixin, + NonPredictionIntervalContextIgnorantModelMixin, + NonPredictionIntervalContextIgnorantAbstractModel, +): + """ + Class holding :py:class:`sklearn.linear_model.ElasticNet` for all segments. -class ElasticMultiSegmentModel(SklearnMultiSegmentModel): - """Class holding :py:class:`sklearn.linear_model.ElasticNet` for all segments.""" + Notes + ----- + Target components are formed as the terms from linear regression formula. + """ def __init__(self, alpha: float = 1.0, l1_ratio: float = 0.5, fit_intercept: bool = True, **kwargs): """ @@ -114,10 +190,12 @@ def __init__(self, alpha: float = 1.0, l1_ratio: float = 0.5, fit_intercept: boo self.fit_intercept = fit_intercept self.kwargs = kwargs super().__init__( - regressor=ElasticNet( - alpha=self.alpha, - l1_ratio=self.l1_ratio, - fit_intercept=self.fit_intercept, - **self.kwargs, + base_model=_LinearAdapter( + regressor=ElasticNet( + alpha=self.alpha, + l1_ratio=self.l1_ratio, + fit_intercept=self.fit_intercept, + **self.kwargs, + ) ) ) diff --git a/etna/models/sklearn.py b/etna/models/sklearn.py index dd1893d39..e2388cd02 100644 --- a/etna/models/sklearn.py +++ b/etna/models/sklearn.py @@ -63,6 +63,21 @@ def predict(self, df: pd.DataFrame) -> np.ndarray: pred = self.model.predict(features) return pred + def predict_components(self, df: pd.DataFrame) -> pd.DataFrame: + """Estimate prediction components. + + Parameters + ---------- + df: + features dataframe + + Returns + ------- + : + dataframe with prediction components + """ + raise NotImplementedError("Prediction decomposition isn't currently implemented!") + def get_model(self) -> RegressorMixin: """Get internal sklearn model that is used inside etna class. diff --git a/tests/test_models/test_linear_model.py b/tests/test_models/test_linear_model.py index c325940fe..339af16dc 100644 --- a/tests/test_models/test_linear_model.py +++ b/tests/test_models/test_linear_model.py @@ -1,3 +1,6 @@ +from typing import List +from typing import Tuple + import numpy as np import pandas as pd import pytest @@ -9,6 +12,7 @@ from etna.models.linear import ElasticPerSegmentModel from etna.models.linear import LinearMultiSegmentModel from etna.models.linear import LinearPerSegmentModel +from etna.models.linear import _LinearAdapter from etna.pipeline import Pipeline from etna.transforms.math import LagTransform from etna.transforms.timestamp import DateFlagsTransform @@ -42,6 +46,14 @@ def ts_with_categoricals(random_seed) -> TSDataset: return ts +@pytest.fixture +def df_with_regressors(example_tsds) -> Tuple[pd.DataFrame, List[str]]: + lags = LagTransform(in_column="target", lags=[7], out_column="lag") + dflg = DateFlagsTransform(day_number_in_week=True, day_number_in_month=True, is_weekend=False, out_column="df") + example_tsds.fit_transform([lags, dflg]) + return example_tsds.to_pandas(flatten=True).dropna(), example_tsds.regressors + + def linear_segments_by_parameters(alpha_values, intercept_values): dates = pd.date_range(start="2020-02-01", freq="D", periods=210) x = np.arange(210) @@ -269,3 +281,47 @@ def test_save_load(model, example_tsds): horizon = 3 transforms = [LagTransform(in_column="target", lags=list(range(horizon, horizon + 3)))] assert_model_equals_loaded_original(model=model, ts=example_tsds, transforms=transforms, horizon=horizon) + + +@pytest.mark.parametrize( + "fit_intercept, expected_component_names", + [ + ( + True, + [ + "target_component_lag_7", + "target_component_df_day_number_in_week", + "target_component_df_day_number_in_month", + "target_component_intercept", + ], + ), + ( + False, + [ + "target_component_lag_7", + "target_component_df_day_number_in_week", + "target_component_df_day_number_in_month", + ], + ), + ], +) +@pytest.mark.parametrize("regressor_constructor", (LinearRegression, ElasticNet)) +def test_linear_adapter_predict_components_correct_names( + df_with_regressors, regressor_constructor, fit_intercept, expected_component_names +): + df, regressors = df_with_regressors + adapter = _LinearAdapter(regressor=regressor_constructor(fit_intercept=fit_intercept)) + adapter.fit(df=df, regressors=regressors) + target_components = adapter.predict_components(df) + assert sorted(target_components.columns) == sorted(expected_component_names) + + +@pytest.mark.parametrize("fit_intercept", (True, False)) +@pytest.mark.parametrize("regressor_constructor", (LinearRegression, ElasticNet)) +def test_linear_adapter_predict_components_sum_up_to_target(df_with_regressors, regressor_constructor, fit_intercept): + df, regressors = df_with_regressors + adapter = _LinearAdapter(regressor=regressor_constructor(fit_intercept=fit_intercept)) + adapter.fit(df=df, regressors=regressors) + target = adapter.predict(df) + target_components = adapter.predict_components(df) + np.testing.assert_array_almost_equal(target, target_components.sum(axis=1), decimal=10)