From 4a4ccedb8dfd69060e24628e4609c6233de89c21 Mon Sep 17 00:00:00 2001 From: alex-hse-repository Date: Mon, 26 Sep 2022 07:59:05 +0300 Subject: [PATCH 01/13] Update methods in TSDataset --- etna/datasets/tsdataset.py | 110 ++++++++----------------------------- 1 file changed, 23 insertions(+), 87 deletions(-) diff --git a/etna/datasets/tsdataset.py b/etna/datasets/tsdataset.py index 917d1bc60..ce8c7b6b0 100644 --- a/etna/datasets/tsdataset.py +++ b/etna/datasets/tsdataset.py @@ -11,7 +11,6 @@ from typing import List from typing import Optional from typing import Sequence -from typing import Set from typing import Tuple from typing import Union @@ -25,7 +24,7 @@ from etna.loggers import tslogger if TYPE_CHECKING: - from etna.transforms.base import Transform + from etna.transforms.base import NewTransform if SETTINGS.torch_required: from torch.utils.data import Dataset @@ -137,29 +136,19 @@ def __init__( self.df_exog.index = pd.to_datetime(self.df_exog.index) self.df = self._merge_exog(self.df) - self.transforms: Optional[Sequence["Transform"]] = None - - def transform(self, transforms: Sequence["Transform"]): + def transform(self, transforms: Sequence["NewTransform"]): """Apply given transform to the data.""" self._check_endings(warning=True) - self.transforms = transforms - for transform in self.transforms: + for transform in transforms: tslogger.log(f"Transform {repr(transform)} is applied to dataset") - columns_before = set(self.columns.get_level_values("feature")) - self.df = transform.transform(self.df) - columns_after = set(self.columns.get_level_values("feature")) - self._update_regressors(transform=transform, columns_before=columns_before, columns_after=columns_after) + transform.transform(self) - def fit_transform(self, transforms: Sequence["Transform"]): + def fit_transform(self, transforms: Sequence["NewTransform"]): """Fit and apply given transforms to the data.""" self._check_endings(warning=True) - self.transforms = transforms - for transform in self.transforms: + for transform in transforms: tslogger.log(f"Transform {repr(transform)} is applied to dataset") - columns_before = set(self.columns.get_level_values("feature")) - self.df = transform.fit_transform(self.df) - columns_after = set(self.columns.get_level_values("feature")) - self._update_regressors(transform=transform, columns_before=columns_before, columns_after=columns_after) + transform.fit_transform(self) @staticmethod def _prepare_df(df: pd.DataFrame) -> pd.DataFrame: @@ -170,60 +159,6 @@ def _prepare_df(df: pd.DataFrame) -> pd.DataFrame: df_copy.columns = pd.MultiIndex.from_frame(columns_frame) return df_copy - def _update_regressors(self, transform: "Transform", columns_before: Set[str], columns_after: Set[str]): - from etna.transforms import OneHotEncoderTransform - from etna.transforms.base import FutureMixin - - # intersect list of regressors with columns after the transform - self._regressors = list(set(self._regressors).intersection(columns_after)) - - unseen_columns = list(columns_after - columns_before) - - if len(unseen_columns) == 0: - return - - new_regressors = [] - - if isinstance(transform, FutureMixin): - # Every column from FutureMixin is regressor - out_columns = list(columns_after - columns_before) - new_regressors = out_columns - elif isinstance(transform, OneHotEncoderTransform): - # Only the columns created with OneHotEncoderTransform from regressor are regressors - in_column = transform.in_column - out_columns = list(columns_after - columns_before) - if in_column in self.regressors: - new_regressors = out_columns - elif hasattr(transform, "in_column"): - # Only the columns created with the other transforms from regressors are regressors - in_columns = transform.in_column if isinstance(transform.in_column, list) else [transform.in_column] # type: ignore - if hasattr(transform, "out_columns") and transform.out_columns is not None: # type: ignore - # User defined out_columns in sklearn - # TODO: remove this case after fixing the out_column attribute in SklearnTransform - out_columns = transform.out_columns # type: ignore - regressors_in_column_ids = [i for i, in_column in enumerate(in_columns) if in_column in self.regressors] - new_regressors = [out_columns[i] for i in regressors_in_column_ids] - elif hasattr(transform, "out_column") and transform.out_column is not None: # type: ignore - # User defined out_columns - out_columns = transform.out_column if isinstance(transform.out_column, list) else [transform.out_column] # type: ignore - regressors_in_column_ids = [i for i, in_column in enumerate(in_columns) if in_column in self.regressors] - new_regressors = [out_columns[i] for i in regressors_in_column_ids] - else: - # Default out_columns - out_columns = list(columns_after - columns_before) - regressors_in_column = [in_column for in_column in in_columns if in_column in self.regressors] - new_regressors = [ - out_column - for out_column in out_columns - if np.any([regressor in out_column for regressor in regressors_in_column]) - ] - - else: - raise ValueError("Transform is not FutureMixin and does not have in_column attribute!") - - new_regressors = [regressor for regressor in new_regressors if regressor not in self.regressors] - self._regressors.extend(new_regressors) - def __repr__(self): return self.df.__repr__() @@ -243,13 +178,17 @@ def __getitem__(self, item): df = df.loc[first_valid_idx:] return df - def make_future(self, future_steps: int, tail_steps: int = 0) -> "TSDataset": + def make_future( + self, future_steps: int, transforms: Sequence["NewTransform"] = (), tail_steps: int = 0 + ) -> "TSDataset": """Return new TSDataset with future steps. Parameters ---------- future_steps: number of timestamp in the future to build features for. + transforms: + sequence of transforms to be applied. tail_steps: number of timestamp for context to build features for. @@ -307,10 +246,11 @@ def make_future(self, future_steps: int, tail_steps: int = 0) -> "TSDataset": f"NaN-s will be used for missing values" ) - if self.transforms is not None: - for transform in self.transforms: - tslogger.log(f"Transform {repr(transform)} is applied to dataset") - df = transform.transform(df) + ts = TSDataset(df=df, freq=self.freq) + for transform in transforms: + tslogger.log(f"Transform {repr(transform)} is applied to dataset") + transform.transform(ts) + df = ts.to_pandas() future_dataset = df.tail(future_steps + tail_steps).copy(deep=True) @@ -318,9 +258,8 @@ def make_future(self, future_steps: int, tail_steps: int = 0) -> "TSDataset": future_ts = TSDataset(df=future_dataset, freq=self.freq) # can't put known_future into constructor, _check_known_future fails with df_exog=None - future_ts.known_future = self.known_future - future_ts._regressors = self.regressors - future_ts.transforms = self.transforms + future_ts.known_future = deepcopy(self.known_future) + future_ts._regressors = deepcopy(self.regressors) future_ts.df_exog = self.df_exog return future_ts @@ -344,7 +283,6 @@ def tsdataset_idx_slice(self, start_idx: Optional[int] = None, end_idx: Optional # can't put known_future into constructor, _check_known_future fails with df_exog=None tsdataset_slice.known_future = deepcopy(self.known_future) tsdataset_slice._regressors = deepcopy(self.regressors) - tsdataset_slice.transforms = deepcopy(self.transforms) tsdataset_slice.df_exog = self.df_exog return tsdataset_slice @@ -425,16 +363,14 @@ def _check_endings(self, warning=False): else: raise ValueError("All segments should end at the same timestamp") - def inverse_transform(self): + def inverse_transform(self, transforms: Sequence["NewTransform"]): """Apply inverse transform method of transforms to the data. Applied in reversed order. """ - # TODO: return regressors after inverse_transform - if self.transforms is not None: - for transform in reversed(self.transforms): - tslogger.log(f"Inverse transform {repr(transform)} is applied to dataset") - self.df = transform.inverse_transform(self.df) + for transform in reversed(transforms): + tslogger.log(f"Inverse transform {repr(transform)} is applied to dataset") + transform.inverse_transform(self) @property def segments(self) -> List[str]: From 1b6b8c8585a5ff698f52d2c3c8be55623740ee44 Mon Sep 17 00:00:00 2001 From: alex-hse-repository Date: Mon, 26 Sep 2022 08:03:04 +0300 Subject: [PATCH 02/13] Remove inverce_transform from models --- etna/models/base.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/etna/models/base.py b/etna/models/base.py index 33e631181..d42c0aef6 100644 --- a/etna/models/base.py +++ b/etna/models/base.py @@ -320,7 +320,6 @@ def forecast(self, ts: TSDataset) -> TSDataset: df = TSDataset.to_dataset(df) ts.df = df - ts.inverse_transform() return ts @@ -373,7 +372,6 @@ def forecast( df = TSDataset.to_dataset(df) ts.df = df - ts.inverse_transform() return ts @@ -429,7 +427,6 @@ def forecast(self, ts: TSDataset) -> TSDataset: x = ts.to_pandas(flatten=True).drop(["segment"], axis=1) y = self._base_model.predict(x).reshape(-1, horizon).T ts.loc[:, pd.IndexSlice[:, "target"]] = y - ts.inverse_transform() return ts def get_model(self) -> Any: @@ -805,8 +802,6 @@ def forecast(self, ts: "TSDataset", horizon: int) -> "TSDataset": for (segment, feature_nm), value in predictions.items(): future_ts.df.loc[:, pd.IndexSlice[segment, feature_nm]] = value[:horizon, :] - future_ts.inverse_transform() - return future_ts def get_model(self) -> "DeepBaseNet": From 212fb5651885e475ebee993216202106c09643d9 Mon Sep 17 00:00:00 2001 From: alex-hse-repository Date: Mon, 26 Sep 2022 08:09:07 +0300 Subject: [PATCH 03/13] Fix analysis --- etna/analysis/plotters.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/etna/analysis/plotters.py b/etna/analysis/plotters.py index 9a0bd72eb..fff4ddf5c 100644 --- a/etna/analysis/plotters.py +++ b/etna/analysis/plotters.py @@ -31,7 +31,7 @@ from etna.analysis.feature_selection import AGGREGATION_FN from etna.analysis.feature_selection import AggregationMode from etna.analysis.utils import prepare_axes -from etna.transforms import Transform +from etna.transforms import NewTransform if TYPE_CHECKING: from etna.datasets import TSDataset @@ -983,7 +983,6 @@ def get_residuals(forecast_df: pd.DataFrame, ts: "TSDataset") -> "TSDataset": new_ts = TSDataset(df=true_df, freq=ts.freq) new_ts.known_future = ts.known_future new_ts._regressors = ts.regressors - new_ts.transforms = ts.transforms new_ts.df_exog = ts.df_exog return new_ts @@ -992,7 +991,7 @@ def plot_residuals( forecast_df: pd.DataFrame, ts: "TSDataset", feature: Union[str, Literal["timestamp"]] = "timestamp", - transforms: Sequence[Transform] = (), + transforms: Sequence[NewTransform] = (), segments: Optional[List[str]] = None, columns_num: int = 2, figsize: Tuple[int, int] = (10, 5), From 4c9878b8c5b6f33e6235a275d32881577979196a Mon Sep 17 00:00:00 2001 From: alex-hse-repository Date: Mon, 26 Sep 2022 08:17:38 +0300 Subject: [PATCH 04/13] Fix pipelines --- etna/pipeline/assembling_pipelines.py | 6 +++--- etna/pipeline/autoregressive_pipeline.py | 12 +++++------- etna/pipeline/pipeline.py | 13 ++++++++----- 3 files changed, 16 insertions(+), 15 deletions(-) diff --git a/etna/pipeline/assembling_pipelines.py b/etna/pipeline/assembling_pipelines.py index 7af4d95b0..d96f45f78 100644 --- a/etna/pipeline/assembling_pipelines.py +++ b/etna/pipeline/assembling_pipelines.py @@ -7,12 +7,12 @@ from etna.models.base import BaseModel from etna.pipeline.pipeline import Pipeline -from etna.transforms import Transform +from etna.transforms import NewTransform def assemble_pipelines( models: Union[BaseModel, Sequence[BaseModel]], - transforms: Sequence[Union[Transform, Sequence[Optional[Transform]]]], + transforms: Sequence[Union[NewTransform, Sequence[Optional[NewTransform]]]], horizons: Union[int, Sequence[int]], ) -> List[Pipeline]: """Create pipelines with broadcasting from models, transforms and horizons. @@ -93,7 +93,7 @@ def assemble_pipelines( for transform in transforms: if isinstance(transform, Sequence) and transform[i] is not None: transfoms_pipelines[-1].append(transform[i]) - elif isinstance(transform, Transform) and transform is not None: + elif isinstance(transform, NewTransform) and transform is not None: transfoms_pipelines[-1].append(transform) return [ diff --git a/etna/pipeline/autoregressive_pipeline.py b/etna/pipeline/autoregressive_pipeline.py index cd27959d8..04fe5ee44 100644 --- a/etna/pipeline/autoregressive_pipeline.py +++ b/etna/pipeline/autoregressive_pipeline.py @@ -6,7 +6,7 @@ from etna.datasets import TSDataset from etna.models.base import BaseModel from etna.pipeline.base import BasePipeline -from etna.transforms import Transform +from etna.transforms import NewTransform class AutoRegressivePipeline(BasePipeline): @@ -51,7 +51,7 @@ class AutoRegressivePipeline(BasePipeline): 2020-04-16 8.00 6.00 2.00 0.00 """ - def __init__(self, model: BaseModel, horizon: int, transforms: Sequence[Transform] = (), step: int = 1): + def __init__(self, model: BaseModel, horizon: int, transforms: Sequence[NewTransform] = (), step: int = 1): """ Create instance of AutoRegressivePipeline with given parameters. @@ -89,7 +89,7 @@ def fit(self, ts: TSDataset) -> "AutoRegressivePipeline": self.ts = ts ts.fit_transform(self.transforms) self.model.fit(ts) - self.ts.inverse_transform() + self.ts.inverse_transform(self.transforms) return self def _create_predictions_template(self) -> pd.DataFrame: @@ -121,8 +121,6 @@ def _forecast(self) -> TSDataset: df_exog=self.ts.df_exog, known_future=self.ts.known_future, ) - # manually set transforms in current_ts, otherwise make_future won't know about them - current_ts.transforms = self.transforms with warnings.catch_warnings(): warnings.filterwarnings( message="TSDataset freq can't be inferred", @@ -132,7 +130,7 @@ def _forecast(self) -> TSDataset: message="You probably set wrong freq.", action="ignore", ) - current_ts_forecast = current_ts.make_future(current_step) + current_ts_forecast = current_ts.make_future(current_step, transforms=self.transforms) current_ts_future = self.model.forecast(current_ts_forecast) prediction_df = prediction_df.combine_first(current_ts_future.to_pandas()[prediction_df.columns]) @@ -141,7 +139,7 @@ def _forecast(self) -> TSDataset: df=prediction_df, freq=self.ts.freq, df_exog=self.ts.df_exog, known_future=self.ts.known_future ) prediction_ts.transform(self.transforms) - prediction_ts.inverse_transform() + prediction_ts.inverse_transform(self.transforms) # cut only last timestamps from result dataset prediction_ts.df = prediction_ts.df.tail(self.horizon) prediction_ts.raw_df = prediction_ts.raw_df.tail(self.horizon) diff --git a/etna/pipeline/pipeline.py b/etna/pipeline/pipeline.py index bb22bd5e4..f48f6bf50 100644 --- a/etna/pipeline/pipeline.py +++ b/etna/pipeline/pipeline.py @@ -5,13 +5,13 @@ from etna.models.base import DeepBaseModel from etna.models.base import PredictIntervalAbstractModel from etna.pipeline.base import BasePipeline -from etna.transforms.base import Transform +from etna.transforms.base import NewTransform class Pipeline(BasePipeline): """Pipeline of transforms with a final estimator.""" - def __init__(self, model: BaseModel, transforms: Sequence[Transform] = (), horizon: int = 1): + def __init__(self, model: BaseModel, transforms: Sequence[NewTransform] = (), horizon: int = 1): """ Create instance of Pipeline with given parameters. @@ -46,7 +46,7 @@ def fit(self, ts: TSDataset) -> "Pipeline": self.ts = ts self.ts.fit_transform(self.transforms) self.model.fit(self.ts) - self.ts.inverse_transform() + self.ts.inverse_transform(self.transforms) return self def _forecast(self) -> TSDataset: @@ -55,7 +55,9 @@ def _forecast(self) -> TSDataset: raise ValueError("Something went wrong, ts is None!") if isinstance(self.model, DeepBaseModel): - future = self.ts.make_future(future_steps=self.model.decoder_length, tail_steps=self.model.encoder_length) + future = self.ts.make_future( + future_steps=self.model.decoder_length, transforms=self.transforms, tail_steps=self.model.encoder_length + ) predictions = self.model.forecast(ts=future, horizon=self.horizon) else: future = self.ts.make_future(self.horizon) @@ -90,10 +92,11 @@ def forecast( self._validate_backtest_n_folds(n_folds=n_folds) if prediction_interval and isinstance(self.model, PredictIntervalAbstractModel): - future = self.ts.make_future(self.horizon) + future = self.ts.make_future(self.horizon, transforms=self.transforms) predictions = self.model.forecast(ts=future, prediction_interval=prediction_interval, quantiles=quantiles) else: predictions = super().forecast( prediction_interval=prediction_interval, quantiles=quantiles, n_folds=n_folds ) + predictions.inverse_transform(self.transforms) return predictions From f36df229eb5c6f735925ce626a46d1ab8c8245ad Mon Sep 17 00:00:00 2001 From: alex-hse-repository Date: Mon, 26 Sep 2022 08:29:57 +0300 Subject: [PATCH 05/13] Remove old Transform --- etna/analysis/plotters.py | 4 +- etna/datasets/tsdataset.py | 12 +-- etna/models/nn/deepar.py | 1 - etna/models/nn/tft.py | 1 - etna/pipeline/assembling_pipelines.py | 6 +- etna/pipeline/autoregressive_pipeline.py | 4 +- etna/pipeline/pipeline.py | 6 +- etna/transforms/__init__.py | 2 - etna/transforms/base.py | 126 ++--------------------- 9 files changed, 22 insertions(+), 140 deletions(-) diff --git a/etna/analysis/plotters.py b/etna/analysis/plotters.py index fff4ddf5c..c4699fc4b 100644 --- a/etna/analysis/plotters.py +++ b/etna/analysis/plotters.py @@ -31,7 +31,7 @@ from etna.analysis.feature_selection import AGGREGATION_FN from etna.analysis.feature_selection import AggregationMode from etna.analysis.utils import prepare_axes -from etna.transforms import NewTransform +from etna.transforms import Transform if TYPE_CHECKING: from etna.datasets import TSDataset @@ -991,7 +991,7 @@ def plot_residuals( forecast_df: pd.DataFrame, ts: "TSDataset", feature: Union[str, Literal["timestamp"]] = "timestamp", - transforms: Sequence[NewTransform] = (), + transforms: Sequence[Transform] = (), segments: Optional[List[str]] = None, columns_num: int = 2, figsize: Tuple[int, int] = (10, 5), diff --git a/etna/datasets/tsdataset.py b/etna/datasets/tsdataset.py index ce8c7b6b0..4bfb6c162 100644 --- a/etna/datasets/tsdataset.py +++ b/etna/datasets/tsdataset.py @@ -24,7 +24,7 @@ from etna.loggers import tslogger if TYPE_CHECKING: - from etna.transforms.base import NewTransform + from etna.transforms.base import Transform if SETTINGS.torch_required: from torch.utils.data import Dataset @@ -136,14 +136,14 @@ def __init__( self.df_exog.index = pd.to_datetime(self.df_exog.index) self.df = self._merge_exog(self.df) - def transform(self, transforms: Sequence["NewTransform"]): + def transform(self, transforms: Sequence["Transform"]): """Apply given transform to the data.""" self._check_endings(warning=True) for transform in transforms: tslogger.log(f"Transform {repr(transform)} is applied to dataset") transform.transform(self) - def fit_transform(self, transforms: Sequence["NewTransform"]): + def fit_transform(self, transforms: Sequence["Transform"]): """Fit and apply given transforms to the data.""" self._check_endings(warning=True) for transform in transforms: @@ -178,9 +178,7 @@ def __getitem__(self, item): df = df.loc[first_valid_idx:] return df - def make_future( - self, future_steps: int, transforms: Sequence["NewTransform"] = (), tail_steps: int = 0 - ) -> "TSDataset": + def make_future(self, future_steps: int, transforms: Sequence["Transform"] = (), tail_steps: int = 0) -> "TSDataset": """Return new TSDataset with future steps. Parameters @@ -363,7 +361,7 @@ def _check_endings(self, warning=False): else: raise ValueError("All segments should end at the same timestamp") - def inverse_transform(self, transforms: Sequence["NewTransform"]): + def inverse_transform(self, transforms: Sequence["Transform"]): """Apply inverse transform method of transforms to the data. Applied in reversed order. diff --git a/etna/models/nn/deepar.py b/etna/models/nn/deepar.py index e7087bd60..200a1dd78 100644 --- a/etna/models/nn/deepar.py +++ b/etna/models/nn/deepar.py @@ -236,5 +236,4 @@ def forecast( df = df.sort_index(axis=1) ts.df = df - ts.inverse_transform() return ts diff --git a/etna/models/nn/tft.py b/etna/models/nn/tft.py index 878607b7b..b072581d0 100644 --- a/etna/models/nn/tft.py +++ b/etna/models/nn/tft.py @@ -268,5 +268,4 @@ def forecast( df = df.sort_index(axis=1) ts.df = df - ts.inverse_transform() return ts diff --git a/etna/pipeline/assembling_pipelines.py b/etna/pipeline/assembling_pipelines.py index d96f45f78..7af4d95b0 100644 --- a/etna/pipeline/assembling_pipelines.py +++ b/etna/pipeline/assembling_pipelines.py @@ -7,12 +7,12 @@ from etna.models.base import BaseModel from etna.pipeline.pipeline import Pipeline -from etna.transforms import NewTransform +from etna.transforms import Transform def assemble_pipelines( models: Union[BaseModel, Sequence[BaseModel]], - transforms: Sequence[Union[NewTransform, Sequence[Optional[NewTransform]]]], + transforms: Sequence[Union[Transform, Sequence[Optional[Transform]]]], horizons: Union[int, Sequence[int]], ) -> List[Pipeline]: """Create pipelines with broadcasting from models, transforms and horizons. @@ -93,7 +93,7 @@ def assemble_pipelines( for transform in transforms: if isinstance(transform, Sequence) and transform[i] is not None: transfoms_pipelines[-1].append(transform[i]) - elif isinstance(transform, NewTransform) and transform is not None: + elif isinstance(transform, Transform) and transform is not None: transfoms_pipelines[-1].append(transform) return [ diff --git a/etna/pipeline/autoregressive_pipeline.py b/etna/pipeline/autoregressive_pipeline.py index 04fe5ee44..696a3ccac 100644 --- a/etna/pipeline/autoregressive_pipeline.py +++ b/etna/pipeline/autoregressive_pipeline.py @@ -6,7 +6,7 @@ from etna.datasets import TSDataset from etna.models.base import BaseModel from etna.pipeline.base import BasePipeline -from etna.transforms import NewTransform +from etna.transforms import Transform class AutoRegressivePipeline(BasePipeline): @@ -51,7 +51,7 @@ class AutoRegressivePipeline(BasePipeline): 2020-04-16 8.00 6.00 2.00 0.00 """ - def __init__(self, model: BaseModel, horizon: int, transforms: Sequence[NewTransform] = (), step: int = 1): + def __init__(self, model: BaseModel, horizon: int, transforms: Sequence[Transform] = (), step: int = 1): """ Create instance of AutoRegressivePipeline with given parameters. diff --git a/etna/pipeline/pipeline.py b/etna/pipeline/pipeline.py index f48f6bf50..513bdace6 100644 --- a/etna/pipeline/pipeline.py +++ b/etna/pipeline/pipeline.py @@ -5,13 +5,13 @@ from etna.models.base import DeepBaseModel from etna.models.base import PredictIntervalAbstractModel from etna.pipeline.base import BasePipeline -from etna.transforms.base import NewTransform +from etna.transforms.base import Transform class Pipeline(BasePipeline): """Pipeline of transforms with a final estimator.""" - def __init__(self, model: BaseModel, transforms: Sequence[NewTransform] = (), horizon: int = 1): + def __init__(self, model: BaseModel, transforms: Sequence[Transform] = (), horizon: int = 1): """ Create instance of Pipeline with given parameters. @@ -60,7 +60,7 @@ def _forecast(self) -> TSDataset: ) predictions = self.model.forecast(ts=future, horizon=self.horizon) else: - future = self.ts.make_future(self.horizon) + future = self.ts.make_future(self.horizon, transforms=self.transforms) predictions = self.model.forecast(ts=future) return predictions diff --git a/etna/transforms/__init__.py b/etna/transforms/__init__.py index a46f37d90..8ea28c046 100644 --- a/etna/transforms/__init__.py +++ b/etna/transforms/__init__.py @@ -1,7 +1,5 @@ from etna.transforms.base import IrreversiblePerSegmentWrapper from etna.transforms.base import IrreversibleTransform -from etna.transforms.base import NewPerSegmentWrapper -from etna.transforms.base import NewTransform from etna.transforms.base import OneSegmentTransform from etna.transforms.base import PerSegmentWrapper from etna.transforms.base import ReversiblePerSegmentWrapper diff --git a/etna/transforms/base.py b/etna/transforms/base.py index a833529a5..2060276fa 100644 --- a/etna/transforms/base.py +++ b/etna/transforms/base.py @@ -18,7 +18,7 @@ class FutureMixin: """Mixin for transforms that can convert non-regressor column to a regressor one.""" -class NewTransform(ABC, BaseMixin): +class Transform(ABC, BaseMixin): """Base class to create any transforms to apply to data.""" def __init__(self, required_features: Union[Literal["all"], List[str]]): @@ -70,7 +70,7 @@ def _fit(self, df: pd.DataFrame): """ pass - def fit(self, ts: TSDataset) -> "NewTransform": + def fit(self, ts: TSDataset) -> "Transform": """Fit the transform. Parameters @@ -160,7 +160,7 @@ def inverse_transform(self, ts: TSDataset) -> TSDataset: pass -class IrreversibleTransform(NewTransform): +class IrreversibleTransform(Transform): """Base class to create irreversible transforms.""" def __init__(self, required_features: Union[Literal["all"], List[str]]): @@ -184,7 +184,7 @@ def inverse_transform(self, ts: TSDataset) -> TSDataset: return ts -class ReversibleTransform(NewTransform): +class ReversibleTransform(Transform): """Base class to create reversible transforms.""" def __init__(self, required_features: Union[Literal["all"], List[str]]): @@ -230,118 +230,6 @@ def inverse_transform(self, ts: TSDataset) -> TSDataset: return ts -class Transform(ABC, BaseMixin): - """Base class to create any transforms to apply to data.""" - - @abstractmethod - def fit(self, df: pd.DataFrame) -> "Transform": - """Fit feature model. - - Should be implemented by user. - - Parameters - ---------- - df - - Returns - ------- - : - """ - pass - - @abstractmethod - def transform(self, df: pd.DataFrame) -> pd.DataFrame: - """Transform dataframe. - - Should be implemented by user - - Parameters - ---------- - df - - Returns - ------- - : - """ - pass - - def fit_transform(self, df: pd.DataFrame) -> pd.DataFrame: - """ - May be reimplemented. But it is not recommended. - - Parameters - ---------- - df - - Returns - ------- - : - """ - return self.fit(df).transform(df) - - def inverse_transform(self, df: pd.DataFrame) -> pd.DataFrame: - """Inverse transforms dataframe. - - Parameters - ---------- - df - - Returns - ------- - : - """ - return df - - -class PerSegmentWrapper(Transform): - """Class to apply transform in per segment manner.""" - - def __init__(self, transform): - self._base_transform = transform - self.segment_transforms = {} - self.segments = None - - def fit(self, df: pd.DataFrame) -> "PerSegmentWrapper": - """Fit transform on each segment.""" - self.segments = df.columns.get_level_values(0).unique() - for segment in self.segments: - self.segment_transforms[segment] = deepcopy(self._base_transform) - self.segment_transforms[segment].fit(df[segment]) - return self - - def transform(self, df: pd.DataFrame) -> pd.DataFrame: - """Apply transform to each segment separately.""" - results = [] - for key, value in self.segment_transforms.items(): - seg_df = value.transform(df[key]) - - _idx = seg_df.columns.to_frame() - _idx.insert(0, "segment", key) - seg_df.columns = pd.MultiIndex.from_frame(_idx) - - results.append(seg_df) - df = pd.concat(results, axis=1) - df = df.sort_index(axis=1) - df.columns.names = ["segment", "feature"] - return df - - def inverse_transform(self, df: pd.DataFrame) -> pd.DataFrame: - """Apply inverse_transform to each segment.""" - results = [] - for key, value in self.segment_transforms.items(): - seg_df = value.inverse_transform(df[key]) - - _idx = seg_df.columns.to_frame() - _idx.insert(0, "segment", key) - seg_df.columns = pd.MultiIndex.from_frame(_idx) - - results.append(seg_df) - df = pd.concat(results, axis=1) - df = df.sort_index(axis=1) - df.columns.names = ["segment", "feature"] - return df - - class OneSegmentTransform(ABC, BaseMixin): """Base class to create one segment transforms to apply to data.""" @@ -412,7 +300,7 @@ def inverse_transform(self, df: pd.DataFrame) -> pd.DataFrame: pass -class NewPerSegmentWrapper(NewTransform): +class PerSegmentWrapper(Transform): """Class to apply transform in per segment manner.""" def __init__(self, transform: OneSegmentTransform, required_features: Union[Literal["all"], List[str]]): @@ -449,14 +337,14 @@ def _transform(self, df: pd.DataFrame) -> pd.DataFrame: return df -class IrreversiblePerSegmentWrapper(NewPerSegmentWrapper, IrreversibleTransform): +class IrreversiblePerSegmentWrapper(PerSegmentWrapper, IrreversibleTransform): """Class to apply irreversible transform in per segment manner.""" def __init__(self, transform: OneSegmentTransform, required_features: Union[Literal["all"], List[str]]): super().__init__(transform=transform, required_features=required_features) -class ReversiblePerSegmentWrapper(NewPerSegmentWrapper, ReversibleTransform): +class ReversiblePerSegmentWrapper(PerSegmentWrapper, ReversibleTransform): """Class to apply reversible transform in per segment manner.""" def __init__(self, transform: OneSegmentTransform, required_features: Union[Literal["all"], List[str]]): From 993dd6d07406798932af51717c3c169c766b4756 Mon Sep 17 00:00:00 2001 From: alex-hse-repository Date: Mon, 26 Sep 2022 08:37:26 +0300 Subject: [PATCH 06/13] Fix dataset tests --- tests/test_datasets/test_dataset.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/tests/test_datasets/test_dataset.py b/tests/test_datasets/test_dataset.py index 1ef3c71dc..ca26ac78f 100644 --- a/tests/test_datasets/test_dataset.py +++ b/tests/test_datasets/test_dataset.py @@ -459,12 +459,11 @@ def test_make_future_raise_error_on_diff_endings(ts_diff_endings): ts_diff_endings.make_future(10) -@pytest.mark.xfail(reason="TSDataset 2.0") def test_make_future_with_imputer(ts_diff_endings, ts_future): imputer = TimeSeriesImputerTransform(in_column="target") ts_diff_endings.fit_transform([imputer]) - future = ts_diff_endings.make_future(10) - assert_frame_equal(future.df, ts_future.df) + future = ts_diff_endings.make_future(10, transforms=[imputer]) + assert_frame_equal(future.to_pandas(), ts_future.to_pandas()) def test_make_future(): @@ -834,7 +833,6 @@ def test_tsdataset_idx_slice(tsdf_with_exog, start_idx, end_idx): ts_slice = tsdf_with_exog.tsdataset_idx_slice(start_idx=start_idx, end_idx=end_idx) assert ts_slice.known_future == tsdf_with_exog.known_future assert ts_slice.regressors == tsdf_with_exog.regressors - assert ts_slice.transforms == tsdf_with_exog.transforms pd.testing.assert_frame_equal(ts_slice.df, tsdf_with_exog.df.iloc[start_idx:end_idx]) pd.testing.assert_frame_equal(ts_slice.df_exog, tsdf_with_exog.df_exog) From 80a520ecfe46084fe0a31bd310fbf46c4e14144d Mon Sep 17 00:00:00 2001 From: alex-hse-repository Date: Mon, 26 Sep 2022 09:58:47 +0300 Subject: [PATCH 07/13] Fix some xfails --- tests/test_analysis/test_plotters.py | 3 --- tests/test_datasets/test_dataset.py | 13 ------------ tests/test_ensembles/test_voting_ensemble.py | 3 --- tests/test_models/nn/test_mlp.py | 7 ++++--- tests/test_models/test_base.py | 8 ------- tests/test_models/test_catboost.py | 17 +++++++-------- tests/test_models/test_linear_model.py | 16 ++++++-------- tests/test_models/test_tbats.py | 4 ++-- .../test_assembling_pipelines.py | 2 -- .../test_autoregressive_pipeline.py | 10 ++------- tests/test_pipeline/test_pipeline.py | 21 +++++-------------- ...st_change_points_segmentation_transform.py | 4 +--- .../test_decomposition/test_stl_transform.py | 4 ++-- .../test_transform_quantiles.py | 2 +- .../test_categorical_transform.py | 7 ++++--- .../test_mean_segment_encoder_transform.py | 4 ++-- .../test_feature_importance_transform.py | 1 - .../test_math/test_lambda_transform.py | 4 ++-- .../test_math/test_log_transform.py | 1 - .../test_impute_transform.py | 9 ++++---- .../test_resample_transform.py | 3 +-- .../test_outliers/test_outliers_transform.py | 7 +++---- .../test_timestamp/test_fourier_transform.py | 7 ++++--- 23 files changed, 51 insertions(+), 106 deletions(-) diff --git a/tests/test_analysis/test_plotters.py b/tests/test_analysis/test_plotters.py index eec5f3ba2..19f758cca 100644 --- a/tests/test_analysis/test_plotters.py +++ b/tests/test_analysis/test_plotters.py @@ -71,7 +71,6 @@ def test_get_residuals_not_matching_segments(residuals): _ = get_residuals(forecast_df=forecast_df, ts=ts) -@pytest.mark.xfail(reason="TSDataset 2.0") def test_plot_residuals_fails_unkown_feature(example_tsdf): """Test that plot_residuals fails if meet unknown feature.""" pipeline = Pipeline( @@ -82,7 +81,6 @@ def test_plot_residuals_fails_unkown_feature(example_tsdf): plot_residuals(forecast_df=forecast_df, ts=example_tsdf, feature="unkown_feature") -@pytest.mark.xfail(reason="TSDataset 2.0: blocked by another transform") @pytest.mark.parametrize( "poly_degree, trend_transform_class", ( @@ -101,7 +99,6 @@ def test_plot_bin_seg(example_tsdf, detrend_model): plot_trend(ts=example_tsdf, trend_transform=BinsegTrendTransform(in_column="target", detrend_model=detrend_model)) -@pytest.mark.xfail(reason="TSDataset 2.0: blocked by another transform") @pytest.mark.parametrize("period", (7, 30)) def test_plot_stl(example_tsdf, period): plot_trend(ts=example_tsdf, trend_transform=STLTransform(in_column="target", period=period)) diff --git a/tests/test_datasets/test_dataset.py b/tests/test_datasets/test_dataset.py index ca26ac78f..c96770574 100644 --- a/tests/test_datasets/test_dataset.py +++ b/tests/test_datasets/test_dataset.py @@ -807,19 +807,6 @@ def ts_with_regressors(df_and_regressors): return ts -def _test_update_regressors_transform(ts, transforms, expected_regressors): - fitted_transforms = [transform.fit(ts.df) for transform in transforms] - ts.transform(fitted_transforms) - regressors = ts.regressors - assert sorted(regressors) == sorted(expected_regressors) - - -def _test_update_regressors_fit_transform(ts, transforms, expected_regressors): - ts.fit_transform(transforms) - regressors = ts.regressors - assert sorted(regressors) == sorted(expected_regressors) - - def test_to_dataset_not_modify_dataframe(): timestamp = pd.date_range("2021-01-01", "2021-02-01") df_original = pd.DataFrame({"timestamp": timestamp, "target": 11, "segment": 1}) diff --git a/tests/test_ensembles/test_voting_ensemble.py b/tests/test_ensembles/test_voting_ensemble.py index 514139eff..c624821ca 100644 --- a/tests/test_ensembles/test_voting_ensemble.py +++ b/tests/test_ensembles/test_voting_ensemble.py @@ -77,7 +77,6 @@ def test_fit_interface( assert len(result) == 2 -@pytest.mark.xfail(reason="TSDataset 2.0") def test_forecast_interface(example_tsds: TSDataset, catboost_pipeline: Pipeline, prophet_pipeline: Pipeline): """Check that VotingEnsemble.forecast returns TSDataset of correct length.""" ensemble = VotingEnsemble(pipelines=[catboost_pipeline, prophet_pipeline]) @@ -117,7 +116,6 @@ def test_forecast_prediction_interval_interface(example_tsds, naive_pipeline_1, @pytest.mark.long -@pytest.mark.xfail(reason="TSDataset 2.0") def test_multiprocessing_ensembles( simple_df: TSDataset, catboost_pipeline: Pipeline, @@ -139,7 +137,6 @@ def test_multiprocessing_ensembles( assert (single_jobs_forecast.df == multi_jobs_forecast.df).all().all() -@pytest.mark.xfail(reason="TSDataset 2.0") @pytest.mark.parametrize("n_jobs", (1, 5)) def test_backtest(voting_ensemble_pipeline: VotingEnsemble, example_tsds: TSDataset, n_jobs: int): """Check that backtest works with VotingEnsemble.""" diff --git a/tests/test_models/nn/test_mlp.py b/tests/test_models/nn/test_mlp.py index 0645e6b0a..700d01530 100644 --- a/tests/test_models/nn/test_mlp.py +++ b/tests/test_models/nn/test_mlp.py @@ -14,7 +14,6 @@ from etna.transforms import StandardScalerTransform -@pytest.mark.xfail(reason="TSDataset 2.0") @pytest.mark.parametrize("horizon", [8, 13]) def test_mlp_model_run_weekly_overfit_with_scaler(ts_dataset_weekly_function_with_horizon, horizon): @@ -22,7 +21,8 @@ def test_mlp_model_run_weekly_overfit_with_scaler(ts_dataset_weekly_function_wit lag = LagTransform(in_column="target", lags=list(range(horizon, horizon + 4))) fourier = FourierTransform(period=7, order=3) std = StandardScalerTransform(in_column="target") - ts_train.fit_transform([std, lag, fourier]) + transforms = [std, lag, fourier] + ts_train.fit_transform(transforms) decoder_length = 14 model = MLPModel( @@ -32,9 +32,10 @@ def test_mlp_model_run_weekly_overfit_with_scaler(ts_dataset_weekly_function_wit decoder_length=decoder_length, trainer_params=dict(max_epochs=100), ) - future = ts_train.make_future(decoder_length) + future = ts_train.make_future(decoder_length, transforms=transforms) model.fit(ts_train) future = model.forecast(future, horizon=horizon) + future.inverse_transform(transforms) mae = MAE("macro") assert mae(ts_test, future) < 0.05 diff --git a/tests/test_models/test_base.py b/tests/test_models/test_base.py index 45dc2e451..17c1df808 100644 --- a/tests/test_models/test_base.py +++ b/tests/test_models/test_base.py @@ -138,13 +138,6 @@ def test_deep_base_model_raw_predict_call(dataloader, deep_base_model_mock): np.testing.assert_allclose(predictions_dict[("segment2", "target")], batch["target"][1].numpy()) -def test_deep_base_model_forecast_inverse_transform_call_check(deep_base_model_mock): - ts = MagicMock() - horizon = 7 - DeepBaseModel.forecast(self=deep_base_model_mock, ts=ts, horizon=horizon) - ts.tsdataset_idx_slice.return_value.inverse_transform.assert_called_once() - - def test_deep_base_model_forecast_loop(simple_df, deep_base_model_mock): ts = MagicMock() ts_after_tsdataset_idx_slice = MagicMock() @@ -163,4 +156,3 @@ def test_deep_base_model_forecast_loop(simple_df, deep_base_model_mock): np.testing.assert_allclose( future.df.loc[:, pd.IndexSlice["B", "target"]], raw_predict[("B", "target")][:horizon, 0] ) - ts.tsdataset_idx_slice.return_value.inverse_transform.assert_called_once() diff --git a/tests/test_models/test_catboost.py b/tests/test_models/test_catboost.py index 90501d7f4..4b71aef46 100644 --- a/tests/test_models/test_catboost.py +++ b/tests/test_models/test_catboost.py @@ -10,7 +10,6 @@ from etna.transforms.math import LagTransform -@pytest.mark.xfail(reason="TSDataset 2.0") @pytest.mark.parametrize("catboostmodel", [CatBoostMultiSegmentModel, CatBoostPerSegmentModel]) def test_run(catboostmodel, new_format_df): df = new_format_df @@ -22,15 +21,15 @@ def test_run(catboostmodel, new_format_df): model = catboostmodel() model.fit(ts) - future_ts = ts.make_future(3) + future_ts = ts.make_future(3, transforms=[lags]) model.forecast(future_ts) + future_ts.inverse_transform([lags]) if not future_ts.isnull().values.any(): assert True else: assert False -@pytest.mark.xfail(reason="TSDataset 2.0") @pytest.mark.parametrize("catboostmodel", [CatBoostMultiSegmentModel, CatBoostPerSegmentModel]) def test_run_with_reg(catboostmodel, new_format_df, new_format_exog): df = new_format_df @@ -41,13 +40,14 @@ def test_run_with_reg(catboostmodel, new_format_df, new_format_exog): lags = LagTransform(lags=[3, 4, 5], in_column="target") lags_exog = LagTransform(lags=[3, 4, 5, 6], in_column="regressor_exog") - - ts.fit_transform([lags, lags_exog]) + transforms = [lags, lags_exog] + ts.fit_transform(transforms) model = catboostmodel() model.fit(ts) - future_ts = ts.make_future(3) + future_ts = ts.make_future(3, transforms=transforms) model.forecast(future_ts) + future_ts.inverse_transform(transforms) if not future_ts.isnull().values.any(): assert True else: @@ -71,18 +71,18 @@ def constant_ts(size=40) -> TSDataset: return train, test -@pytest.mark.xfail(reason="TSDataset 2.0") def test_catboost_multi_segment_forecast(constant_ts): train, test = constant_ts horizon = len(test.df) lags = LagTransform(in_column="target", lags=[10, 11, 12]) train.fit_transform([lags]) - future = train.make_future(horizon) + future = train.make_future(horizon, transforms=[lags]) model = CatBoostMultiSegmentModel() model.fit(train) forecast = model.forecast(future) + forecast.inverse_transform([lags]) for segment in forecast.segments: assert np.allclose(test[:, segment, "target"], forecast[:, segment, "target"]) @@ -100,7 +100,6 @@ def test_get_model_per_segment_before_training(): _ = etna_model.get_model() -@pytest.mark.xfail(reason="TSDataset 2.0") def test_get_model_per_segment_after_training(example_tsds): pipeline = Pipeline(model=CatBoostPerSegmentModel(), transforms=[LagTransform(in_column="target", lags=[2, 3])]) pipeline.fit(ts=example_tsds) diff --git a/tests/test_models/test_linear_model.py b/tests/test_models/test_linear_model.py index ad471d7f3..2477bbd10 100644 --- a/tests/test_models/test_linear_model.py +++ b/tests/test_models/test_linear_model.py @@ -81,7 +81,6 @@ def linear_segments_ts_common(random_seed): return linear_segments_by_parameters(alpha_values, intercept_values) -@pytest.mark.xfail(reason="TSDataset 2.0") @pytest.mark.parametrize("model", (LinearPerSegmentModel(), ElasticPerSegmentModel())) def test_not_fitted(model, linear_segments_ts_unique): """Check exception when trying to forecast with unfitted model.""" @@ -89,7 +88,7 @@ def test_not_fitted(model, linear_segments_ts_unique): lags = LagTransform(in_column="target", lags=[3, 4, 5]) train.fit_transform([lags]) - to_forecast = train.make_future(3) + to_forecast = train.make_future(3, transforms=[lags]) with pytest.raises(ValueError, match="model is not fitted!"): model.forecast(to_forecast) @@ -122,7 +121,6 @@ def test_repr_elastic(model_class, model_class_repr): assert model_repr == true_repr -@pytest.mark.xfail(reason="TSDataset 2.0") @pytest.mark.parametrize("model", [LinearPerSegmentModel(), ElasticPerSegmentModel()]) @pytest.mark.parametrize("num_lags", [3, 5, 10, 20, 30]) def test_model_per_segment(linear_segments_ts_unique, num_lags, model): @@ -139,14 +137,14 @@ def test_model_per_segment(linear_segments_ts_unique, num_lags, model): model.fit(train) - to_forecast = train.make_future(horizon) + to_forecast = train.make_future(horizon, transforms=[lags]) res = model.forecast(to_forecast) + res.inverse_transform([lags]) for segment in res.segments: assert np.allclose(test[:, segment, "target"], res[:, segment, "target"], atol=1) -@pytest.mark.xfail(reason="TSDataset 2.0") @pytest.mark.parametrize("model", [LinearMultiSegmentModel(), ElasticMultiSegmentModel()]) @pytest.mark.parametrize("num_lags", [3, 5, 10, 20, 30]) def test_model_multi_segment(linear_segments_ts_common, num_lags, model): @@ -163,14 +161,14 @@ def test_model_multi_segment(linear_segments_ts_common, num_lags, model): model.fit(train) - to_forecast = train.make_future(horizon) + to_forecast = train.make_future(horizon, transforms=[lags]) res = model.forecast(to_forecast) + res.inverse_transform([lags]) for segment in res.segments: assert np.allclose(test[:, segment, "target"], res[:, segment, "target"], atol=1) -@pytest.mark.xfail(reason="TSDataset 2.0") @pytest.mark.parametrize("model", [LinearPerSegmentModel()]) def test_no_warning_on_categorical_features(example_tsds, model): """Check that SklearnModel raises no warning working with dataset with categorical features""" @@ -195,7 +193,7 @@ def test_no_warning_on_categorical_features(example_tsds, model): == 0 ) - to_forecast = example_tsds.make_future(horizon) + to_forecast = example_tsds.make_future(horizon, transforms=[lags, dateflags]) with pytest.warns(None) as record: _ = model.forecast(to_forecast) assert ( @@ -212,7 +210,6 @@ def test_no_warning_on_categorical_features(example_tsds, model): ) -@pytest.mark.xfail(reason="TSDataset 2.0") @pytest.mark.parametrize("model", [LinearPerSegmentModel()]) def test_raise_error_on_unconvertable_features(ts_with_categoricals, model): """Check that SklearnModel raises error working with dataset with categorical features which can't be converted to numeric""" @@ -247,7 +244,6 @@ def test_get_model_per_segment_before_training(): _ = etna_model.get_model() -@pytest.mark.xfail(reason="TSDataset 2.0") @pytest.mark.parametrize( "etna_class,expected_model_class", ( diff --git a/tests/test_models/test_tbats.py b/tests/test_models/test_tbats.py index 44ac805c4..75150f50e 100644 --- a/tests/test_models/test_tbats.py +++ b/tests/test_models/test_tbats.py @@ -84,7 +84,6 @@ def test_not_fitted(model, linear_segments_ts_unique): model.forecast(to_forecast) -@pytest.mark.xfail(reason="TSDataset 2.0") @pytest.mark.parametrize("model", [TBATSModel(), BATSModel()]) def test_format(model, new_format_df): df = new_format_df @@ -92,8 +91,9 @@ def test_format(model, new_format_df): lags = LagTransform(lags=[3, 4, 5], in_column="target") ts.fit_transform([lags]) model.fit(ts) - future_ts = ts.make_future(3) + future_ts = ts.make_future(3, transforms=[lags]) model.forecast(future_ts) + future_ts.inverse_transform([lags]) assert not future_ts.isnull().values.any() diff --git a/tests/test_pipeline/test_assembling_pipelines.py b/tests/test_pipeline/test_assembling_pipelines.py index 85eff8962..3d0948071 100644 --- a/tests/test_pipeline/test_assembling_pipelines.py +++ b/tests/test_pipeline/test_assembling_pipelines.py @@ -99,7 +99,6 @@ def test_output_pipelines(models, transforms, horizons, expected_len): assert isinstance(pipeline, Pipeline) -@pytest.mark.xfail(reason="TSDataset 2.0") @pytest.mark.parametrize( "models, transforms, horizons, expected_transforms_lens", [ @@ -141,7 +140,6 @@ def test_different_objects(): assert len({id(pipeline.horizon) for pipeline in pipelines}) == len(pipelines) -@pytest.mark.xfail(reason="TSDataset 2.0") @pytest.mark.parametrize( "models, transforms, horizons, expected_len", [ diff --git a/tests/test_pipeline/test_autoregressive_pipeline.py b/tests/test_pipeline/test_autoregressive_pipeline.py index 32223058f..bf80b9183 100644 --- a/tests/test_pipeline/test_autoregressive_pipeline.py +++ b/tests/test_pipeline/test_autoregressive_pipeline.py @@ -18,7 +18,6 @@ DEFAULT_METRICS = [MAE(mode=MetricAggregationMode.per_segment)] -@pytest.mark.xfail(reason="TSDataset 2.0") def test_fit(example_tsds): """Test that AutoRegressivePipeline pipeline makes fit without failing.""" model = LinearPerSegmentModel() @@ -27,7 +26,6 @@ def test_fit(example_tsds): pipeline.fit(example_tsds) -@pytest.mark.xfail(reason="TSDataset 2.0") def test_forecast_columns(example_reg_tsds): """Test that AutoRegressivePipeline generates all the columns.""" original_ts = deepcopy(example_reg_tsds) @@ -54,7 +52,6 @@ def test_forecast_columns(example_reg_tsds): ) -@pytest.mark.xfail(reason="TSDataset 2.0") def test_forecast_one_step(example_tsds): """Test that AutoRegressivePipeline gets predictions one by one if step is equal to 1.""" original_ts = deepcopy(example_tsds) @@ -76,8 +73,9 @@ def test_forecast_one_step(example_tsds): cur_ts = TSDataset(df, freq=original_ts.freq) # these transform don't fit and we can fit_transform them at each step cur_ts.transform(transforms) - cur_forecast_ts = cur_ts.make_future(1) + cur_forecast_ts = cur_ts.make_future(1, transforms=transforms) cur_future_ts = model.forecast(cur_forecast_ts) + cur_future_ts.inverse_transform(transforms) to_add_df = cur_future_ts.to_pandas() df = pd.concat([df, to_add_df[df.columns]]) @@ -85,7 +83,6 @@ def test_forecast_one_step(example_tsds): assert np.all(forecast_pipeline[:, :, "target"] == forecast_manual[:, :, "target"]) -@pytest.mark.xfail(reason="TSDataset 2.0") @pytest.mark.parametrize("horizon, step", ((1, 1), (5, 1), (5, 2), (5, 3), (5, 4), (5, 5), (20, 1), (20, 2), (20, 3))) def test_forecast_multi_step(example_tsds, horizon, step): """Test that AutoRegressivePipeline gets correct number of predictions if step is more than 1.""" @@ -98,7 +95,6 @@ def test_forecast_multi_step(example_tsds, horizon, step): assert forecast_pipeline.df.shape[0] == horizon -@pytest.mark.xfail(reason="TSDataset 2.0") def test_forecast_prediction_interval_interface(example_tsds): """Test the forecast interface with prediction intervals.""" pipeline = AutoRegressivePipeline( @@ -112,7 +108,6 @@ def test_forecast_prediction_interval_interface(example_tsds): assert (segment_slice["target_0.975"] - segment_slice["target_0.025"] >= 0).all() -@pytest.mark.xfail(reason="TSDataset 2.0") def test_forecast_with_fit_transforms(example_tsds): """Test that AutoRegressivePipeline can work with transforms that need fitting.""" horizon = 5 @@ -132,7 +127,6 @@ def test_forecast_raise_error_if_not_fitted(): @pytest.mark.long -@pytest.mark.xfail(reason="TSDataset 2.0") def test_backtest_with_n_jobs(big_example_tsdf: TSDataset): """Check that AutoRegressivePipeline.backtest gives the same results in case of single and multiple jobs modes.""" # create a pipeline diff --git a/tests/test_pipeline/test_pipeline.py b/tests/test_pipeline/test_pipeline.py index be4b9a54e..0016a099e 100644 --- a/tests/test_pipeline/test_pipeline.py +++ b/tests/test_pipeline/test_pipeline.py @@ -64,7 +64,6 @@ def test_init_fail(horizon): ) -@pytest.mark.xfail(reason="TSDataset 2.0") def test_fit(example_tsds): """Test that Pipeline correctly transforms dataset on fit stage.""" original_ts = deepcopy(example_tsds) @@ -73,11 +72,10 @@ def test_fit(example_tsds): pipeline = Pipeline(model=model, transforms=transforms, horizon=5) pipeline.fit(example_tsds) original_ts.fit_transform(transforms) - original_ts.inverse_transform() + original_ts.inverse_transform(transforms) assert np.all(original_ts.df.values == pipeline.ts.df.values) -@pytest.mark.xfail(reason="TSDataset 2.0") def test_forecast(example_tsds): """Test that the forecast from the Pipeline is correct.""" original_ts = deepcopy(example_tsds) @@ -90,13 +88,13 @@ def test_forecast(example_tsds): original_ts.fit_transform(transforms) model.fit(original_ts) - future = original_ts.make_future(5) + future = original_ts.make_future(5, transforms=transforms) forecast_manual = model.forecast(future) + forecast_manual.inverse_transform(transforms) assert np.all(forecast_pipeline.df.values == forecast_manual.df.values) -@pytest.mark.xfail(reason="TSDataset 2.0") @pytest.mark.parametrize( "quantiles,prediction_interval_cv,error_msg", ( @@ -130,7 +128,6 @@ def test_forecast_prediction_interval_builtin(example_tsds, model): assert forecast_model.df.equals(forecast_pipeline.df) -@pytest.mark.xfail(reason="TSDataset 2.0") @pytest.mark.parametrize("model", (MovingAverageModel(), LinearPerSegmentModel())) def test_forecast_prediction_interval_interface(example_tsds, model): """Test the forecast interface for the models without built-in prediction intervals.""" @@ -292,7 +289,6 @@ def test_generate_constant_timeranges_hours(): assert stage_df.index.max() == datetime.strptime(borders[1], "%Y-%m-%d %H:%M:%S").date() -@pytest.mark.xfail(reason="TSDataset 2.0") @pytest.mark.parametrize( "aggregate_metrics,expected_columns", ( @@ -318,7 +314,6 @@ def test_get_metrics_interface( assert sorted(expected_columns) == sorted(metrics_df.columns) -@pytest.mark.xfail(reason="TSDataset 2.0") def test_get_forecasts_interface_daily(catboost_pipeline: Pipeline, big_daily_example_tsdf: TSDataset): """Check that Pipeline.backtest returns forecasts in correct format.""" _, forecast, _ = catboost_pipeline.backtest(ts=big_daily_example_tsdf, metrics=DEFAULT_METRICS) @@ -328,7 +323,6 @@ def test_get_forecasts_interface_daily(catboost_pipeline: Pipeline, big_daily_ex assert expected_columns == sorted(set(forecast.columns.get_level_values("feature"))) -@pytest.mark.xfail(reason="TSDataset 2.0") def test_get_forecasts_interface_hours(catboost_pipeline: Pipeline, example_tsdf: TSDataset): """Check that Pipeline.backtest returns forecasts in correct format with non-daily seasonality.""" _, forecast, _ = catboost_pipeline.backtest(ts=example_tsdf, metrics=DEFAULT_METRICS) @@ -338,7 +332,6 @@ def test_get_forecasts_interface_hours(catboost_pipeline: Pipeline, example_tsdf assert expected_columns == sorted(set(forecast.columns.get_level_values("feature"))) -@pytest.mark.xfail(reason="TSDataset 2.0") def test_get_fold_info_interface_daily(catboost_pipeline: Pipeline, big_daily_example_tsdf: TSDataset): """Check that Pipeline.backtest returns info dataframe in correct format.""" _, _, info_df = catboost_pipeline.backtest(ts=big_daily_example_tsdf, metrics=DEFAULT_METRICS) @@ -346,7 +339,6 @@ def test_get_fold_info_interface_daily(catboost_pipeline: Pipeline, big_daily_ex assert expected_columns == sorted(info_df.columns) -@pytest.mark.xfail(reason="TSDataset 2.0") def test_get_fold_info_interface_hours(catboost_pipeline: Pipeline, example_tsdf: TSDataset): """Check that Pipeline.backtest returns info dataframe in correct format with non-daily seasonality.""" _, _, info_df = catboost_pipeline.backtest(ts=example_tsdf, metrics=DEFAULT_METRICS) @@ -355,7 +347,6 @@ def test_get_fold_info_interface_hours(catboost_pipeline: Pipeline, example_tsdf @pytest.mark.long -@pytest.mark.xfail(reason="TSDataset 2.0") def test_backtest_with_n_jobs(catboost_pipeline: Pipeline, big_example_tsdf: TSDataset): """Check that Pipeline.backtest gives the same results in case of single and multiple jobs modes.""" ts1 = deepcopy(big_example_tsdf) @@ -384,12 +375,11 @@ def test_forecast_raise_error_if_not_fitted(): _ = pipeline.forecast() -@pytest.mark.xfail(reason="TSDataset 2.0") -def test_forecast_pipeline_with_nan_at_the_end(df_with_nans_in_tails): +def test_forecast_pipeline_with_nan_at_the_end(ts_with_nans_in_tails): """Test that Pipeline can forecast with datasets with nans at the end.""" pipeline = Pipeline(model=NaiveModel(), horizon=5) - pipeline.fit(TSDataset(df_with_nans_in_tails, freq="1H")) + pipeline.fit(ts_with_nans_in_tails) forecast = pipeline.forecast() assert len(forecast.df) == 5 @@ -541,7 +531,6 @@ def test_sanity_backtest_naive_with_intervals(weekly_period_ts): assert f"target_{quantiles[1]}" in features -@pytest.mark.xfail(reason="TSDataset 2.0") def test_backtest_pass_with_filter_transform(ts_with_feature): ts = ts_with_feature diff --git a/tests/test_transforms/test_decomposition/test_change_points_segmentation_transform.py b/tests/test_transforms/test_decomposition/test_change_points_segmentation_transform.py index c98ec7277..f1cc7bddb 100644 --- a/tests/test_transforms/test_decomposition/test_change_points_segmentation_transform.py +++ b/tests/test_transforms/test_decomposition/test_change_points_segmentation_transform.py @@ -87,7 +87,6 @@ def test_transform_raise_error_if_not_fitted(pre_transformed_df: pd.DataFrame): _ = transform.transform(df=pre_transformed_df["segment_1"]) -@pytest.mark.xfail(reason="TSDataset 2.0") def test_backtest(simple_ar_ts): model = CatBoostModelPerSegment() horizon = 3 @@ -114,13 +113,12 @@ def test_future_and_past_filling(simple_ar_ts): assert (after.to_pandas()[seg][OUT_COLUMN].astype(int) == 5).all() -@pytest.mark.xfail(reason="TSDataset 2.0") def test_make_future(simple_ar_ts): change_point_model = RupturesChangePointsModel(change_point_model=Binseg(), n_bkps=N_BKPS) bs = ChangePointsSegmentationTransform( in_column="target", change_point_model=change_point_model, out_column=OUT_COLUMN ) simple_ar_ts.fit_transform(transforms=[bs]) - future = simple_ar_ts.make_future(10) + future = simple_ar_ts.make_future(10, transforms=[bs]) for seg in simple_ar_ts.segments: assert (future.to_pandas()[seg][OUT_COLUMN].astype(int) == 5).all() diff --git a/tests/test_transforms/test_decomposition/test_stl_transform.py b/tests/test_transforms/test_decomposition/test_stl_transform.py index e3ca94aef..847e18dfe 100644 --- a/tests/test_transforms/test_decomposition/test_stl_transform.py +++ b/tests/test_transforms/test_decomposition/test_stl_transform.py @@ -140,7 +140,6 @@ def test_inverse_transform_multi_segments(ts_name, model, request): assert df_inverse_transformed["target"].equals(df["target"]) -@pytest.mark.xfail(reason="TSDataset 2.0") @pytest.mark.parametrize("model_stl", ["arima", "holt"]) def test_forecast(ts_trend_seasonal, model_stl): """Test that transform works correctly in forecast.""" @@ -154,8 +153,9 @@ def test_forecast(ts_trend_seasonal, model_stl): transform.fit_transform(ts_train) model = NaiveModel() model.fit(ts_train) - ts_future = ts_train.make_future(3) + ts_future = ts_train.make_future(3, transforms=[transform]) ts_forecast = model.forecast(ts_future) + ts_forecast.inverse_transform([transform]) for segment in ts_forecast.segments: np.testing.assert_allclose(ts_forecast[:, segment, "target"], ts_test[:, segment, "target"], atol=0.1) diff --git a/tests/test_transforms/test_decomposition/test_transform_quantiles.py b/tests/test_transforms/test_decomposition/test_transform_quantiles.py index e59553cf0..176ab46da 100644 --- a/tests/test_transforms/test_decomposition/test_transform_quantiles.py +++ b/tests/test_transforms/test_decomposition/test_transform_quantiles.py @@ -7,7 +7,7 @@ from etna.transforms import TheilSenTrendTransform -@pytest.mark.xfail(reason="TSDataset 2.0: blocked by another transforms") +@pytest.mark.xfail(reason="TSDataset 2.0: bug in required_features wit quantiles") @pytest.mark.parametrize( "transform", ( diff --git a/tests/test_transforms/test_encoders/test_categorical_transform.py b/tests/test_transforms/test_encoders/test_categorical_transform.py index 23dd29c27..3388184b6 100644 --- a/tests/test_transforms/test_encoders/test_categorical_transform.py +++ b/tests/test_transforms/test_encoders/test_categorical_transform.py @@ -254,18 +254,19 @@ def f(x): return ts -@pytest.mark.xfail(reason="TSDataset 2.0") def test_ohe_sanity(ts_for_ohe_sanity): """Test for correct work in the full forecasting pipeline.""" horizon = 10 train_ts, test_ts = ts_for_ohe_sanity.train_test_split(test_size=horizon) ohe = OneHotEncoderTransform(in_column="regressor_0") filt = FilterFeaturesTransform(exclude=["regressor_0"]) - train_ts.fit_transform([ohe, filt]) + transforms = [ohe, filt] + train_ts.fit_transform(transforms) model = LinearPerSegmentModel() model.fit(train_ts) - future_ts = train_ts.make_future(horizon) + future_ts = train_ts.make_future(horizon, transforms=transforms) forecast_ts = model.forecast(future_ts) + forecast_ts.inverse_transform(transforms) r2 = R2() assert 1 - r2(test_ts, forecast_ts)["segment_0"] < 1e-5 diff --git a/tests/test_transforms/test_encoders/test_mean_segment_encoder_transform.py b/tests/test_transforms/test_encoders/test_mean_segment_encoder_transform.py index e4ff85243..a74823ef9 100644 --- a/tests/test_transforms/test_encoders/test_mean_segment_encoder_transform.py +++ b/tests/test_transforms/test_encoders/test_mean_segment_encoder_transform.py @@ -35,7 +35,6 @@ def almost_constant_ts(random_seed) -> TSDataset: return ts -@pytest.mark.xfail(reason="TSDataset 2.0") def test_mean_segment_encoder_forecast(almost_constant_ts): """Test that MeanSegmentEncoderTransform works correctly in forecast pipeline and helps to correctly forecast almost constant series.""" @@ -46,8 +45,9 @@ def test_mean_segment_encoder_forecast(almost_constant_ts): train, test = almost_constant_ts.train_test_split(test_size=horizon) train.fit_transform([encoder]) model.fit(train) - future = train.make_future(horizon) + future = train.make_future(horizon,transforms=[encoder]) pred_mean_segment_encoding = model.forecast(future) + pred_mean_segment_encoding.inverse_transform([encoder]) metric = R2(mode="macro") diff --git a/tests/test_transforms/test_feature_selection/test_feature_importance_transform.py b/tests/test_transforms/test_feature_selection/test_feature_importance_transform.py index f15d61aa1..7bf858f59 100644 --- a/tests/test_transforms/test_feature_selection/test_feature_importance_transform.py +++ b/tests/test_transforms/test_feature_selection/test_feature_importance_transform.py @@ -198,7 +198,6 @@ def test_sanity_selected(model, ts_with_regressors): assert len(useful_regressors) == 3 -@pytest.mark.xfail(reason="TSDataset 2.0") @pytest.mark.parametrize( "model", [ diff --git a/tests/test_transforms/test_math/test_lambda_transform.py b/tests/test_transforms/test_math/test_lambda_transform.py index 6302ef47b..61af3e65a 100644 --- a/tests/test_transforms/test_math/test_lambda_transform.py +++ b/tests/test_transforms/test_math/test_lambda_transform.py @@ -8,6 +8,7 @@ from etna.transforms import LagTransform from etna.transforms import LambdaTransform from etna.transforms import LogTransform +from copy import deepcopy @pytest.fixture @@ -40,7 +41,6 @@ def ts_range_const(): return ts -@pytest.mark.xfail(reason="TSDataset 2.0: blocked by another transforms") @pytest.mark.parametrize( "transform_original, transform_function, out_column", [ @@ -62,7 +62,7 @@ def ts_range_const(): ], ) def test_save_transform(ts_non_negative, transform_original, transform_function, out_column): - ts_copy = TSDataset(ts_non_negative.to_pandas(), freq="D") + ts_copy = deepcopy(ts_non_negative) ts_copy.fit_transform([transform_original]) ts = ts_non_negative ts.fit_transform( diff --git a/tests/test_transforms/test_math/test_log_transform.py b/tests/test_transforms/test_math/test_log_transform.py index 38d773b64..188015225 100644 --- a/tests/test_transforms/test_math/test_log_transform.py +++ b/tests/test_transforms/test_math/test_log_transform.py @@ -107,7 +107,6 @@ def test_inverse_transform_out_column(positive_ts_: TSDataset): assert out_column in inversed[segment] -@pytest.mark.xfail(reason="TSDataset 2.0: blocked by another transform") def test_fit_transform_with_nans(ts_diff_endings): transform = LogTransform(in_column="target", inplace=True) ts_diff_endings.fit_transform([AddConstTransform(in_column="target", value=100)] + [transform]) diff --git a/tests/test_transforms/test_missing_values/test_impute_transform.py b/tests/test_transforms/test_missing_values/test_impute_transform.py index ed323a884..92f13a210 100644 --- a/tests/test_transforms/test_missing_values/test_impute_transform.py +++ b/tests/test_transforms/test_missing_values/test_impute_transform.py @@ -337,19 +337,18 @@ def test_inverse_transform_many_segments(ts_with_missing_range_x_index_two_segme np.testing.assert_array_equal(df, inverse_transform_result) -@pytest.mark.xfail(reason="TSDataset 2.0") @pytest.mark.parametrize("fill_strategy", ["mean", "constant", "running_mean", "forward_fill", "seasonal"]) -def test_inverse_transform_in_forecast(df_with_missing_range_x_index_two_segments: pd.DataFrame, fill_strategy: str): +def test_inverse_transform_in_forecast(ts_with_missing_range_x_index_two_segments: pd.DataFrame, fill_strategy: str): """Check that inverse_transform doesn't change anything in forecast.""" - df, rng = df_with_missing_range_x_index_two_segments - ts = TSDataset(df, freq=pd.infer_freq(df.index)) + ts, rng = ts_with_missing_range_x_index_two_segments imputer = TimeSeriesImputerTransform(strategy=fill_strategy) model = NaiveModel() ts.fit_transform(transforms=[imputer]) model.fit(ts) - ts_test = ts.make_future(3) + ts_test = ts.make_future(3, transforms=[imputer]) assert np.all(ts_test[:, :, "target"].isna()) ts_forecast = model.forecast(ts_test) + ts_forecast.inverse_transform([imputer]) for segment in ts.segments: true_value = ts[:, segment, "target"].values[-1] assert np.all(ts_forecast[:, segment, "target"] == true_value) diff --git a/tests/test_transforms/test_missing_values/test_resample_transform.py b/tests/test_transforms/test_missing_values/test_resample_transform.py index fd0028126..fb00276d6 100644 --- a/tests/test_transforms/test_missing_values/test_resample_transform.py +++ b/tests/test_transforms/test_missing_values/test_resample_transform.py @@ -52,7 +52,6 @@ def test_transform(daily_exog_ts, inplace, out_column, expected_resampled_ts, re assert resampled_df.equals(expected_resampled_df) -@pytest.mark.xfail(reason="TSDataset 2.0") @pytest.mark.parametrize( "inplace,out_column,expected_resampled_ts", ( @@ -69,7 +68,7 @@ def test_transform_future(daily_exog_ts, inplace, out_column, expected_resampled in_column="regressor_exog", inplace=inplace, distribution_column="target", out_column=out_column ) daily_exog_ts.fit_transform([resampler]) - future = daily_exog_ts.make_future(3) + future = daily_exog_ts.make_future(3, transforms=[resampler]) expected_future = expected_resampled_ts.make_future(3) assert future.df.equals(expected_future.df) diff --git a/tests/test_transforms/test_outliers/test_outliers_transform.py b/tests/test_transforms/test_outliers/test_outliers_transform.py index a30e8f0ec..0bc1f8a9e 100644 --- a/tests/test_transforms/test_outliers/test_outliers_transform.py +++ b/tests/test_transforms/test_outliers/test_outliers_transform.py @@ -105,7 +105,6 @@ def test_inverse_transform_train(transform_constructor, constructor_kwargs, outl assert np.all(original_df == outliers_solid_tsds.df) -@pytest.mark.xfail(reason="TSDataset 2.0") @pytest.mark.parametrize("in_column", ["target", "regressor_1"]) @pytest.mark.parametrize( "transform_constructor, constructor_kwargs", @@ -119,9 +118,9 @@ def test_inverse_transform_future(transform_constructor, constructor_kwargs, out """Checks that inverse transform does not change the future.""" transform = transform_constructor(in_column=in_column, **constructor_kwargs) outliers_solid_tsds.fit_transform([transform]) - future = outliers_solid_tsds.make_future(future_steps=10) - original_future_df = future.df.copy() - future.inverse_transform() + future = outliers_solid_tsds.make_future(future_steps=10, transforms=[transform]) + original_future_df = future.to_pandas() + future.inverse_transform([transform]) # check equals and has nans in the same places assert np.all((future.df == original_future_df) | (future.df.isna() & original_future_df.isna())) diff --git a/tests/test_transforms/test_timestamp/test_fourier_transform.py b/tests/test_transforms/test_timestamp/test_fourier_transform.py index 4932a7070..692f3a200 100644 --- a/tests/test_transforms/test_timestamp/test_fourier_transform.py +++ b/tests/test_transforms/test_timestamp/test_fourier_transform.py @@ -145,17 +145,18 @@ def test_column_values(example_ts, period, mod): assert np.allclose(transform_values, expected_values, atol=1e-12) -@pytest.mark.xfail(reason="TSDataset 2.0") def test_forecast(ts_trend_seasonal): """Test that transform works correctly in forecast.""" transform_1 = FourierTransform(period=7, order=3) transform_2 = FourierTransform(period=30.4, order=5) + transforms = [transform_1, transform_2] ts_train, ts_test = ts_trend_seasonal.train_test_split(test_size=10) - ts_train.fit_transform(transforms=[transform_1, transform_2]) + ts_train.fit_transform(transforms=transforms) model = LinearPerSegmentModel() model.fit(ts_train) - ts_future = ts_train.make_future(10) + ts_future = ts_train.make_future(10, transforms=transforms) ts_forecast = model.forecast(ts_future) + ts_forecast.inverse_transform(transforms) metric = R2("macro") r2 = metric(ts_test, ts_forecast) assert r2 > 0.95 From 5a587a4ea6c67da2020c8b0aad952b2f17ff6958 Mon Sep 17 00:00:00 2001 From: alex-hse-repository Date: Mon, 26 Sep 2022 15:55:07 +0300 Subject: [PATCH 08/13] Fix tests part 2 --- tests/test_loggers/test_console_logger.py | 16 ++--- .../test_mean_segment_encoder_transform.py | 2 +- .../test_math/test_differencing_transform.py | 68 ++++++++----------- .../test_math/test_lambda_transform.py | 3 +- .../test_math/test_transform_quantiles.py | 4 +- 5 files changed, 39 insertions(+), 54 deletions(-) diff --git a/tests/test_loggers/test_console_logger.py b/tests/test_loggers/test_console_logger.py index 5c3f787d6..272f640dc 100644 --- a/tests/test_loggers/test_console_logger.py +++ b/tests/test_loggers/test_console_logger.py @@ -31,7 +31,6 @@ def check_logged_transforms(log_file: str, transforms: Sequence[Transform]): assert transform.__class__.__name__ in line -@pytest.mark.xfail(reason="TSDataset 2.0: blocked by another transform") def test_tsdataset_transform_logging(example_tsds: TSDataset): """Check working of logging inside `TSDataset.transform`.""" transforms = [LagTransform(lags=5, in_column="target"), AddConstTransform(value=5, in_column="target")] @@ -39,12 +38,11 @@ def test_tsdataset_transform_logging(example_tsds: TSDataset): _logger.add(file.name) example_tsds.fit_transform(transforms=transforms) idx = tslogger.add(ConsoleLogger()) - example_tsds.transform(transforms=example_tsds.transforms) + example_tsds.transform(transforms=transforms) check_logged_transforms(log_file=file.name, transforms=transforms) tslogger.remove(idx) -@pytest.mark.xfail(reason="TSDataset 2.0: blocked by another transform") def test_tsdataset_fit_transform_logging(example_tsds: TSDataset): """Check working of logging inside `TSDataset.fit_transform`.""" transforms = [LagTransform(lags=5, in_column="target"), AddConstTransform(value=5, in_column="target")] @@ -56,7 +54,6 @@ def test_tsdataset_fit_transform_logging(example_tsds: TSDataset): tslogger.remove(idx) -@pytest.mark.xfail(reason="TSDataset 2.0: blocked by another transform") def test_tsdataset_make_future_logging(example_tsds: TSDataset): """Check working of logging inside `TSDataset.make_future`.""" transforms = [LagTransform(lags=5, in_column="target"), AddConstTransform(value=5, in_column="target")] @@ -64,12 +61,11 @@ def test_tsdataset_make_future_logging(example_tsds: TSDataset): _logger.add(file.name) example_tsds.fit_transform(transforms=transforms) idx = tslogger.add(ConsoleLogger()) - _ = example_tsds.make_future(5) + _ = example_tsds.make_future(5, transforms=transforms) check_logged_transforms(log_file=file.name, transforms=transforms) tslogger.remove(idx) -@pytest.mark.xfail(reason="TSDataset 2.0: blocked by another transform") def test_tsdataset_inverse_transform_logging(example_tsds: TSDataset): """Check working of logging inside `TSDataset.inverse_transform`.""" transforms = [LagTransform(lags=5, in_column="target"), AddConstTransform(value=5, in_column="target")] @@ -77,12 +73,11 @@ def test_tsdataset_inverse_transform_logging(example_tsds: TSDataset): _logger.add(file.name) example_tsds.fit_transform(transforms=transforms) idx = tslogger.add(ConsoleLogger()) - example_tsds.inverse_transform() + example_tsds.inverse_transform(transforms=transforms) check_logged_transforms(log_file=file.name, transforms=transforms[::-1]) tslogger.remove(idx) -@pytest.mark.xfail(reason="TSDataset 2.0") @pytest.mark.parametrize("metric", [MAE(), MSE(), MAE(mode="macro")]) def test_metric_logging(example_tsds: TSDataset, metric: Metric): """Check working of logging inside `Metric.__call__`.""" @@ -104,7 +99,6 @@ def test_metric_logging(example_tsds: TSDataset, metric: Metric): tslogger.remove(idx) -@pytest.mark.skip(reason="TSDataset 2.0") def test_backtest_logging(example_tsds: TSDataset): """Check working of logging inside backtest.""" file = NamedTemporaryFile() @@ -125,7 +119,6 @@ def test_backtest_logging(example_tsds: TSDataset): tslogger.remove(idx) -@pytest.mark.skip(reason="TSDataset 2.0") def test_backtest_logging_no_tables(example_tsds: TSDataset): """Check working of logging inside backtest with `table=False`.""" file = NamedTemporaryFile() @@ -144,7 +137,6 @@ def test_backtest_logging_no_tables(example_tsds: TSDataset): tslogger.remove(idx) -@pytest.mark.xfail(reason="TSDataset 2.0: blocked by another transform") @pytest.mark.parametrize("model", [LinearPerSegmentModel(), LinearMultiSegmentModel()]) def test_model_logging(example_tsds, model): """Check working of logging in fit/forecast of model.""" @@ -157,7 +149,7 @@ def test_model_logging(example_tsds, model): idx = tslogger.add(ConsoleLogger()) model.fit(example_tsds) - to_forecast = example_tsds.make_future(horizon) + to_forecast = example_tsds.make_future(horizon, transforms=[lags]) model.forecast(to_forecast) with open(file.name, "r") as in_file: diff --git a/tests/test_transforms/test_encoders/test_mean_segment_encoder_transform.py b/tests/test_transforms/test_encoders/test_mean_segment_encoder_transform.py index a74823ef9..06187648d 100644 --- a/tests/test_transforms/test_encoders/test_mean_segment_encoder_transform.py +++ b/tests/test_transforms/test_encoders/test_mean_segment_encoder_transform.py @@ -45,7 +45,7 @@ def test_mean_segment_encoder_forecast(almost_constant_ts): train, test = almost_constant_ts.train_test_split(test_size=horizon) train.fit_transform([encoder]) model.fit(train) - future = train.make_future(horizon,transforms=[encoder]) + future = train.make_future(horizon, transforms=[encoder]) pred_mean_segment_encoding = model.forecast(future) pred_mean_segment_encoding.inverse_transform([encoder]) diff --git a/tests/test_transforms/test_math/test_differencing_transform.py b/tests/test_transforms/test_math/test_differencing_transform.py index 9ec8cb0d6..cf8b4cd87 100644 --- a/tests/test_transforms/test_math/test_differencing_transform.py +++ b/tests/test_transforms/test_math/test_differencing_transform.py @@ -124,15 +124,14 @@ def check_inverse_transform_inplace_train(transform: GeneralDifferencingTransfor def check_inverse_transform_inplace_test( - transform: GeneralDifferencingTransform, period: int, order: int, df: pd.DataFrame + transform: GeneralDifferencingTransform, period: int, order: int, ts: TSDataset ): """Check that differencing transform correctly makes inverse_transform on test data in inplace mode.""" - ts = TSDataset(df, freq="D") ts_train, ts_test = ts.train_test_split(test_size=20) ts_train.fit_transform(transforms=[transform]) # make predictions by hand taking into account the nature of df_nans - future_ts = ts_train.make_future(20) + future_ts = ts_train.make_future(20, transforms=[transform]) if order == 1: future_ts.df.loc[:, pd.IndexSlice["1", "target"]] = 1 * period future_ts.df.loc[:, pd.IndexSlice["2", "target"]] = 2 * period @@ -143,21 +142,21 @@ def check_inverse_transform_inplace_test( raise ValueError("Wrong order") # check values from inverse_transform - future_ts.inverse_transform() + future_ts.inverse_transform([transform]) assert np.all(future_ts.to_pandas() == ts_test.to_pandas()) -def check_inverse_transform_inplace_test_quantiles(transform: GeneralDifferencingTransform, df: pd.DataFrame): +def check_inverse_transform_inplace_test_quantiles(transform: GeneralDifferencingTransform, ts: TSDataset): """Check that differencing transform correctly makes inverse_transform on test data with quantiles.""" - ts = TSDataset(df, freq="D") ts_train, ts_test = ts.train_test_split(test_size=20) ts_train.fit_transform(transforms=[transform]) model = ProphetModel() model.fit(ts_train) # make predictions by Prophet with prediction interval - future_ts = ts_train.make_future(20) + future_ts = ts_train.make_future(20, transforms=[transform]) predict_ts = model.forecast(future_ts, prediction_interval=True, quantiles=[0.025, 0.975]) + predict_ts.inverse_transform([transform]) # check that predicted value is within the interval for segment in predict_ts.segments: @@ -165,10 +164,9 @@ def check_inverse_transform_inplace_test_quantiles(transform: GeneralDifferencin assert np.all(predict_ts[:, segment, "target"] <= predict_ts[:, segment, "target_0.975"]) -def check_backtest_sanity(transform: GeneralDifferencingTransform, df: pd.DataFrame): +def check_backtest_sanity(transform: GeneralDifferencingTransform, ts: TSDataset): """Check that differencing transform correctly works in backtest.""" # create pipeline with linear model - ts = TSDataset(df, freq="D") model = LinearPerSegmentModel() pipeline = Pipeline( model=model, transforms=[LagTransform(in_column="target", lags=[7, 8, 9]), transform], horizon=7 @@ -352,7 +350,6 @@ def test_general_inverse_transform_fail_not_all_test(transform, ts_nans): transform.inverse_transform(ts_nans) -@pytest.mark.xfail(reason="TSDataset 2.0") @pytest.mark.parametrize( "transform", [ @@ -360,16 +357,16 @@ def test_general_inverse_transform_fail_not_all_test(transform, ts_nans): DifferencingTransform(in_column="target", period=1, order=1, inplace=True), ], ) -def test_general_inverse_transform_fail_test_not_right_after_train(transform, df_nans): +def test_general_inverse_transform_fail_test_not_right_after_train(transform, ts_nans): """Test that differencing transform fails to make inverse_transform on not adjacent test data.""" - ts = TSDataset(df_nans, freq="D") + ts = ts_nans ts_train, ts_test = ts.train_test_split(test_size=10) ts_train.fit_transform(transforms=[transform]) - future_ts = ts_train.make_future(10) - future_df = future_ts.to_pandas() + future_ts = ts_train.make_future(10, transforms=[transform]) + future_ts_cropped = TSDataset(future_ts.to_pandas().iloc[1:], freq=future_ts.freq) with pytest.raises(ValueError, match="Test should go after the train without gaps"): - _ = transform.inverse_transform(future_df.iloc[1:]) + _ = transform.inverse_transform(future_ts_cropped) @pytest.mark.parametrize("period", [1, 7]) @@ -402,7 +399,6 @@ def test_full_inverse_transform_inplace_train(period, order, ts_nans): check_inverse_transform_inplace_train(transform, ts_nans) -@pytest.mark.xfail(reason="TSDataset 2.0") @pytest.mark.parametrize( "transform", [ @@ -410,69 +406,65 @@ def test_full_inverse_transform_inplace_train(period, order, ts_nans): DifferencingTransform(in_column="target", period=1, order=1, inplace=True), ], ) -def test_general_inverse_transform_inplace_test_fail_nans(transform, df_nans): +def test_general_inverse_transform_inplace_test_fail_nans(transform, ts_nans): """Test that differencing transform fails to make inverse_transform on test data if there are NaNs.""" - ts = TSDataset(df_nans, freq="D") + ts = ts_nans ts_train, ts_test = ts.train_test_split(test_size=20) ts_train.fit_transform(transforms=[transform]) # make predictions by hand only on one segment - future_ts = ts_train.make_future(20) + future_ts = ts_train.make_future(20, transforms=[transform]) future_ts.df.loc[:, pd.IndexSlice["1", "target"]] = np.NaN future_ts.df.loc[:, pd.IndexSlice["2", "target"]] = 2 # check fail on inverse_transform with pytest.raises(ValueError, match="There should be no NaNs inside the segments"): - future_ts.inverse_transform() + future_ts.inverse_transform([transform]) -@pytest.mark.xfail(reason="TSDataset 2.0") @pytest.mark.parametrize("period", [1, 7]) -def test_single_inverse_transform_inplace_test(period, df_nans): +def test_single_inverse_transform_inplace_test(period, ts_nans): """Test that _SingleDifferencingTransform correctly makes inverse_transform on test data in inplace mode.""" transform = _SingleDifferencingTransform(in_column="target", period=period, inplace=True) - check_inverse_transform_inplace_test(transform, period, 1, df_nans) + check_inverse_transform_inplace_test(transform, period, 1, ts_nans) -@pytest.mark.xfail(reason="TSDataset 2.0") @pytest.mark.parametrize("period", [1, 7]) @pytest.mark.parametrize("order", [1, 2]) -def test_full_inverse_transform_inplace_test(period, order, df_nans): +def test_full_inverse_transform_inplace_test(period, order, ts_nans): """Test that DifferencingTransform correctly makes inverse_transform on test data in inplace mode.""" transform = DifferencingTransform(in_column="target", period=period, order=order, inplace=True) - check_inverse_transform_inplace_test(transform, period, order, df_nans) + check_inverse_transform_inplace_test(transform, period, order, ts_nans) -@pytest.mark.xfail(reason="TSDataset 2.0") +@pytest.mark.xfail(reason="TSDataset 2.0: bug in required_features wit quantiles") @pytest.mark.parametrize("period", [1, 7]) -def test_single_inverse_transform_inplace_test_quantiles(period, df_nans_with_noise): +def test_single_inverse_transform_inplace_test_quantiles(period, ts_nans_with_noise): """Test that _SingleDifferencingTransform correctly makes inverse_transform on test data with quantiles.""" transform = _SingleDifferencingTransform(in_column="target", period=period, inplace=True) - check_inverse_transform_inplace_test_quantiles(transform, df_nans_with_noise) + check_inverse_transform_inplace_test_quantiles(transform, ts_nans_with_noise) -@pytest.mark.xfail(reason="TSDataset 2.0") +@pytest.mark.xfail(reason="TSDataset 2.0: bug in required_features wit quantiles") @pytest.mark.parametrize("period", [1, 7]) @pytest.mark.parametrize("order", [1, 2]) -def test_full_inverse_transform_inplace_test_quantiles(period, order, df_nans_with_noise): +def test_full_inverse_transform_inplace_test_quantiles(period, order, ts_nans_with_noise): """Test that DifferencingTransform correctly makes inverse_transform on test data with quantiles.""" transform = DifferencingTransform(in_column="target", period=period, order=2, inplace=True) - check_inverse_transform_inplace_test_quantiles(transform, df_nans_with_noise) + check_inverse_transform_inplace_test_quantiles(transform, ts_nans_with_noise) -@pytest.mark.xfail(reason="TSDataset 2.0") @pytest.mark.parametrize("period", [1, 7]) -def test_single_backtest_sanity(period, df_nans_with_noise): +def test_single_backtest_sanity(period, ts_nans_with_noise): """Test that _SingleDifferencingTransform correctly works in backtest.""" transform = _SingleDifferencingTransform(in_column="target", period=period, inplace=True) - check_backtest_sanity(transform, df_nans_with_noise) + check_backtest_sanity(transform, ts_nans_with_noise) -@pytest.mark.xfail(reason="TSDataset 2.0") @pytest.mark.parametrize("period", [1, 7]) @pytest.mark.parametrize("order", [1, 2]) -def test_full_backtest_sanity(period, order, df_nans_with_noise): +def test_full_backtest_sanity(period, order, ts_nans_with_noise): """Test that DifferencingTransform correctly works in backtest.""" transform = DifferencingTransform(in_column="target", period=period, order=order, inplace=True) - check_backtest_sanity(transform, df_nans_with_noise) + check_backtest_sanity(transform, ts_nans_with_noise) diff --git a/tests/test_transforms/test_math/test_lambda_transform.py b/tests/test_transforms/test_math/test_lambda_transform.py index 61af3e65a..ef03f47f1 100644 --- a/tests/test_transforms/test_math/test_lambda_transform.py +++ b/tests/test_transforms/test_math/test_lambda_transform.py @@ -1,3 +1,5 @@ +from copy import deepcopy + import numpy as np import pandas as pd import pytest @@ -8,7 +10,6 @@ from etna.transforms import LagTransform from etna.transforms import LambdaTransform from etna.transforms import LogTransform -from copy import deepcopy @pytest.fixture diff --git a/tests/test_transforms/test_math/test_transform_quantiles.py b/tests/test_transforms/test_math/test_transform_quantiles.py index 0fded644e..08d714d3e 100644 --- a/tests/test_transforms/test_math/test_transform_quantiles.py +++ b/tests/test_transforms/test_math/test_transform_quantiles.py @@ -42,7 +42,7 @@ def test_standard_scaler_dummy_mean_shift_for_quantiles_macro(toy_dataset_with_m ) -@pytest.mark.xfail(reason="TSDataset 2.0: blocked by another transforms") +@pytest.mark.xfail(reason="TSDataset 2.0: bug in required_features wit quantiles") def test_add_constant_dummy(toy_dataset_equal_targets_and_quantiles): """ This test checks that inverse_transform transforms forecast's quantiles the same way with target itself and @@ -66,7 +66,7 @@ def test_add_constant_dummy(toy_dataset_equal_targets_and_quantiles): np.testing.assert_allclose(toy_dataset.iloc[:, 2], toy_dataset.iloc[:, 3]) -@pytest.mark.xfail(reason="TSDataset 2.0: blocked by another transforms") +@pytest.mark.xfail(reason="TSDataset 2.0: bug in required_features wit quantiles") @pytest.mark.parametrize( "transform", ( From a41f89f354eb432acf322760af88ffaa0667f953 Mon Sep 17 00:00:00 2001 From: alex-hse-repository Date: Mon, 26 Sep 2022 16:46:37 +0300 Subject: [PATCH 09/13] Fix tests part 3 --- tests/conftest.py | 9 ----- tests/test_models/nn/test_rnn.py | 3 +- tests/test_models/test_inference.py | 59 +++++++++++++++++++---------- 3 files changed, 41 insertions(+), 30 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index 98a1856c1..aedd7820d 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -10,17 +10,8 @@ # TODO: Collection of tests to fix in TSDataset 2.0 collect_ignore = [ - "test_models/test_sklearn.py", - "test_loggers/test_file_logger.py", - "test_loggers/test_wandb_logger.py", - "test_loggers/test_console_logger.py", - "test_models/nn/test_rnn.py", - "test_commands/test_backtest.py", - "test_commands/test_forecast.py", "test_models/nn/test_tft.py", "test_models/nn/test_deepar.py", - "test_models/test_inference.py", - "test_ensembles/test_stacking_ensemble.py", ] diff --git a/tests/test_models/nn/test_rnn.py b/tests/test_models/nn/test_rnn.py index d8db7a400..5a599faa1 100644 --- a/tests/test_models/nn/test_rnn.py +++ b/tests/test_models/nn/test_rnn.py @@ -27,9 +27,10 @@ def test_rnn_model_run_weekly_overfit_with_scaler(ts_dataset_weekly_function_wit model = RNNModel( input_size=1, encoder_length=encoder_length, decoder_length=decoder_length, trainer_params=dict(max_epochs=100) ) - future = ts_train.make_future(decoder_length, encoder_length) + future = ts_train.make_future(decoder_length, transforms=[std], tail_steps=encoder_length) model.fit(ts_train) future = model.forecast(future, horizon=horizon) + future.inverse_transform([std]) mae = MAE("macro") assert mae(ts_test, future) < 0.06 diff --git a/tests/test_models/test_inference.py b/tests/test_models/test_inference.py index 7f43a484d..4d238935e 100644 --- a/tests/test_models/test_inference.py +++ b/tests/test_models/test_inference.py @@ -37,9 +37,10 @@ def _test_forecast_in_sample_full(ts, model, transforms): # forecasting forecast_ts = TSDataset(df, freq="D") - forecast_ts.transform(ts.transforms) + forecast_ts.transform(transforms) forecast_ts.df.loc[:, pd.IndexSlice[:, "target"]] = np.NaN model.forecast(forecast_ts) + forecast_ts.inverse_transform(transforms) # checking forecast_df = forecast_ts.to_pandas(flatten=True) @@ -55,10 +56,11 @@ def _test_forecast_in_sample_suffix(ts, model, transforms): # forecasting forecast_ts = TSDataset(df, freq="D") - forecast_ts.transform(ts.transforms) + forecast_ts.transform(transforms) forecast_ts.df.loc[:, pd.IndexSlice[:, "target"]] = np.NaN forecast_ts.df = forecast_ts.df.iloc[6:] model.forecast(forecast_ts) + forecast_ts.inverse_transform(transforms) # checking forecast_df = forecast_ts.to_pandas(flatten=True) @@ -70,20 +72,22 @@ def _test_forecast_out_sample_prefix(ts, model, transforms): ts.fit_transform(transforms) model.fit(ts) # forecasting full - forecast_full_ts = ts.make_future(5) + forecast_full_ts = ts.make_future(5, transforms=transforms) import torch # TODO: remove after fix at issue-802 torch.manual_seed(11) model.forecast(forecast_full_ts) + forecast_full_ts.inverse_transform(transforms) # forecasting only prefix - forecast_prefix_ts = ts.make_future(5) + forecast_prefix_ts = ts.make_future(5, transforms=transforms) forecast_prefix_ts.df = forecast_prefix_ts.df.iloc[:-2] torch.manual_seed(11) # TODO: remove after fix at issue-802 model.forecast(forecast_prefix_ts) + forecast_prefix_ts.inverse_transform(transforms) # checking forecast_full_df = forecast_full_ts.to_pandas() @@ -97,13 +101,15 @@ def _test_forecast_out_sample_suffix(ts, model, transforms): model.fit(ts) # forecasting full - forecast_full_ts = ts.make_future(5) + forecast_full_ts = ts.make_future(5, transforms=transforms) model.forecast(forecast_full_ts) + forecast_full_ts.inverse_transform(transforms) # forecasting only suffix - forecast_gap_ts = ts.make_future(5) + forecast_gap_ts = ts.make_future(5, transforms=transforms) forecast_gap_ts.df = forecast_gap_ts.df.iloc[2:] model.forecast(forecast_gap_ts) + forecast_gap_ts.inverse_transform(transforms) # checking forecast_full_df = forecast_full_ts.to_pandas() @@ -118,25 +124,28 @@ def _test_forecast_mixed_in_out_sample(ts, model, transforms): model.fit(ts) # forecasting mixed in-sample and out-sample - future_ts = ts.make_future(5) + future_ts = ts.make_future(5, transforms=transforms) future_df = future_ts.to_pandas().loc[:, pd.IndexSlice[:, "target"]] df_full = pd.concat((df, future_df)) forecast_full_ts = TSDataset(df=df_full, freq=future_ts.freq) - forecast_full_ts.transform(ts.transforms) + forecast_full_ts.transform(transforms) forecast_full_ts.df.loc[:, pd.IndexSlice[:, "target"]] = np.NaN forecast_full_ts.df = forecast_full_ts.df.iloc[6:] model.forecast(forecast_full_ts) + forecast_full_ts.inverse_transform(transforms) # forecasting only in sample forecast_in_sample_ts = TSDataset(df, freq="D") - forecast_in_sample_ts.transform(ts.transforms) + forecast_in_sample_ts.transform(transforms) forecast_in_sample_ts.df.loc[:, pd.IndexSlice[:, "target"]] = np.NaN forecast_in_sample_ts.df = forecast_in_sample_ts.df.iloc[6:] model.forecast(forecast_in_sample_ts) + forecast_in_sample_ts.inverse_transform(transforms) # forecasting only out sample - forecast_out_sample_ts = ts.make_future(5) + forecast_out_sample_ts = ts.make_future(5, transforms=transforms) model.forecast(forecast_out_sample_ts) + forecast_out_sample_ts.inverse_transform(transforms) # checking forecast_full_df = forecast_full_ts.to_pandas() @@ -185,7 +194,7 @@ def test_forecast_in_sample_full_failed(model, transforms, example_tsds): [ (BATSModel(use_trend=True), []), (TBATSModel(use_trend=True), []), - ( + pytest.param( DeepARModel(max_epochs=1, learning_rate=[0.01]), [ PytorchForecastingTransform( @@ -196,8 +205,9 @@ def test_forecast_in_sample_full_failed(model, transforms, example_tsds): target_normalizer=GroupNormalizer(groups=["segment"]), ) ], + marks=pytest.mark.xfail(reason="TSDataset 2: PytorchForecasting nets"), ), - ( + pytest.param( TFTModel(max_epochs=1, learning_rate=[0.01]), [ PytorchForecastingTransform( @@ -210,6 +220,7 @@ def test_forecast_in_sample_full_failed(model, transforms, example_tsds): target_normalizer=None, ) ], + marks=pytest.mark.xfail(reason="TSDataset 2: PytorchForecasting nets"), ), ], ) @@ -247,7 +258,7 @@ def test_forecast_in_sample_suffix(model, transforms, example_tsds): [ (BATSModel(use_trend=True), []), (TBATSModel(use_trend=True), []), - ( + pytest.param( DeepARModel(max_epochs=1, learning_rate=[0.01]), [ PytorchForecastingTransform( @@ -258,8 +269,9 @@ def test_forecast_in_sample_suffix(model, transforms, example_tsds): target_normalizer=GroupNormalizer(groups=["segment"]), ) ], + marks=pytest.mark.xfail(reason="TSDataset 2: PytorchForecasting nets"), ), - ( + pytest.param( TFTModel(max_epochs=1, learning_rate=[0.01]), [ PytorchForecastingTransform( @@ -272,6 +284,7 @@ def test_forecast_in_sample_suffix(model, transforms, example_tsds): target_normalizer=None, ) ], + marks=pytest.mark.xfail(reason="TSDataset 2: PytorchForecasting nets"), ), ], ) @@ -300,7 +313,7 @@ def test_forecast_in_sample_suffix_not_implemented(model, transforms, example_ts (NaiveModel(lag=3), []), (BATSModel(use_trend=True), []), (TBATSModel(use_trend=True), []), - ( + pytest.param( DeepARModel(max_epochs=5, learning_rate=[0.01]), [ PytorchForecastingTransform( @@ -311,8 +324,9 @@ def test_forecast_in_sample_suffix_not_implemented(model, transforms, example_ts target_normalizer=GroupNormalizer(groups=["segment"]), ) ], + marks=pytest.mark.xfail(reason="TSDataset 2: PytorchForecasting nets"), ), - ( + pytest.param( TFTModel(max_epochs=1, learning_rate=[0.01]), [ PytorchForecastingTransform( @@ -325,6 +339,7 @@ def test_forecast_in_sample_suffix_not_implemented(model, transforms, example_ts target_normalizer=None, ) ], + marks=pytest.mark.xfail(reason="TSDataset 2: PytorchForecasting nets"), ), ], ) @@ -358,7 +373,7 @@ def test_forecast_out_sample_suffix(model, transforms, example_tsds): @pytest.mark.parametrize( "model, transforms", [ - ( + pytest.param( TFTModel(max_epochs=1, learning_rate=[0.01]), [ PytorchForecastingTransform( @@ -371,8 +386,9 @@ def test_forecast_out_sample_suffix(model, transforms, example_tsds): target_normalizer=None, ) ], + marks=pytest.mark.xfail(reason="TSDataset 2: PytorchForecasting nets"), ), - ( + pytest.param( DeepARModel(max_epochs=5, learning_rate=[0.01]), [ PytorchForecastingTransform( @@ -383,6 +399,7 @@ def test_forecast_out_sample_suffix(model, transforms, example_tsds): target_normalizer=GroupNormalizer(groups=["segment"]), ) ], + marks=pytest.mark.xfail(reason="TSDataset 2: PytorchForecasting nets"), ), ], ) @@ -430,7 +447,7 @@ def test_forecast_mixed_in_out_sample(model, transforms, example_tsds): [ (BATSModel(use_trend=True), []), (TBATSModel(use_trend=True), []), - ( + pytest.param( DeepARModel(max_epochs=5, learning_rate=[0.01]), [ PytorchForecastingTransform( @@ -441,8 +458,9 @@ def test_forecast_mixed_in_out_sample(model, transforms, example_tsds): target_normalizer=GroupNormalizer(groups=["segment"]), ) ], + marks=pytest.mark.xfail(reason="TSDataset 2: PytorchForecasting nets"), ), - ( + pytest.param( TFTModel(max_epochs=1, learning_rate=[0.01]), [ PytorchForecastingTransform( @@ -455,6 +473,7 @@ def test_forecast_mixed_in_out_sample(model, transforms, example_tsds): target_normalizer=None, ) ], + marks=pytest.mark.xfail(reason="TSDataset 2: PytorchForecasting nets"), ), ], ) From 350ef828f1f14f6cb610c2209dcdd9b2e3d88a63 Mon Sep 17 00:00:00 2001 From: alex-hse-repository Date: Mon, 26 Sep 2022 16:46:59 +0300 Subject: [PATCH 10/13] Add TODO to the nets --- etna/models/nn/deepar.py | 5 +++-- etna/models/nn/tft.py | 5 +++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/etna/models/nn/deepar.py b/etna/models/nn/deepar.py index 200a1dd78..622c9645b 100644 --- a/etna/models/nn/deepar.py +++ b/etna/models/nn/deepar.py @@ -126,8 +126,9 @@ def _from_dataset(self, ts_dataset: TimeSeriesDataSet) -> LightningModule: @staticmethod def _get_pf_transform(ts: TSDataset) -> PytorchForecastingTransform: """Get PytorchForecastingTransform from ts.transforms or raise exception if not found.""" - if ts.transforms is not None and isinstance(ts.transforms[-1], PytorchForecastingTransform): - return ts.transforms[-1] + # TODO: TSDataset does not have "transform" attribute anymore + if ts.transforms is not None and isinstance(ts.transforms[-1], PytorchForecastingTransform): # type: ignore + return ts.transforms[-1] # type: ignore else: raise ValueError( "Not valid usage of transforms, please add PytorchForecastingTransform at the end of transforms" diff --git a/etna/models/nn/tft.py b/etna/models/nn/tft.py index b072581d0..f0522a554 100644 --- a/etna/models/nn/tft.py +++ b/etna/models/nn/tft.py @@ -133,8 +133,9 @@ def _from_dataset(self, ts_dataset: TimeSeriesDataSet) -> LightningModule: @staticmethod def _get_pf_transform(ts: TSDataset) -> PytorchForecastingTransform: """Get PytorchForecastingTransform from ts.transforms or raise exception if not found.""" - if ts.transforms is not None and isinstance(ts.transforms[-1], PytorchForecastingTransform): - return ts.transforms[-1] + # TODO: TSDataset does not have "transform" attribute anymore + if ts.transforms is not None and isinstance(ts.transforms[-1], PytorchForecastingTransform): # type: ignore + return ts.transforms[-1] # type: ignore else: raise ValueError( "Not valid usage of transforms, please add PytorchForecastingTransform at the end of transforms" From f541743ebac6d03d845beb61f310c389949f5e95 Mon Sep 17 00:00:00 2001 From: alex-hse-repository Date: Mon, 26 Sep 2022 16:47:14 +0300 Subject: [PATCH 11/13] Add comment to dataset --- etna/datasets/tsdataset.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/etna/datasets/tsdataset.py b/etna/datasets/tsdataset.py index 4bfb6c162..173a06a66 100644 --- a/etna/datasets/tsdataset.py +++ b/etna/datasets/tsdataset.py @@ -178,7 +178,9 @@ def __getitem__(self, item): df = df.loc[first_valid_idx:] return df - def make_future(self, future_steps: int, transforms: Sequence["Transform"] = (), tail_steps: int = 0) -> "TSDataset": + def make_future( + self, future_steps: int, transforms: Sequence["Transform"] = (), tail_steps: int = 0 + ) -> "TSDataset": """Return new TSDataset with future steps. Parameters @@ -244,6 +246,7 @@ def make_future(self, future_steps: int, transforms: Sequence["Transform"] = (), f"NaN-s will be used for missing values" ) + # Here only df if required, other metadata is not necessary to build the dataset ts = TSDataset(df=df, freq=self.freq) for transform in transforms: tslogger.log(f"Transform {repr(transform)} is applied to dataset") From eed9fda1c226ff20b578cf3980456f90aa806204 Mon Sep 17 00:00:00 2001 From: alex-hse-repository Date: Mon, 26 Sep 2022 16:53:29 +0300 Subject: [PATCH 12/13] Fix typo --- .../test_decomposition/test_transform_quantiles.py | 2 +- .../test_transforms/test_math/test_differencing_transform.py | 4 ++-- tests/test_transforms/test_math/test_transform_quantiles.py | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/test_transforms/test_decomposition/test_transform_quantiles.py b/tests/test_transforms/test_decomposition/test_transform_quantiles.py index 176ab46da..b9d396fc4 100644 --- a/tests/test_transforms/test_decomposition/test_transform_quantiles.py +++ b/tests/test_transforms/test_decomposition/test_transform_quantiles.py @@ -7,7 +7,7 @@ from etna.transforms import TheilSenTrendTransform -@pytest.mark.xfail(reason="TSDataset 2.0: bug in required_features wit quantiles") +@pytest.mark.xfail(reason="TSDataset 2.0: bug in required_features with quantiles") @pytest.mark.parametrize( "transform", ( diff --git a/tests/test_transforms/test_math/test_differencing_transform.py b/tests/test_transforms/test_math/test_differencing_transform.py index cf8b4cd87..ba14dc3f1 100644 --- a/tests/test_transforms/test_math/test_differencing_transform.py +++ b/tests/test_transforms/test_math/test_differencing_transform.py @@ -438,7 +438,7 @@ def test_full_inverse_transform_inplace_test(period, order, ts_nans): check_inverse_transform_inplace_test(transform, period, order, ts_nans) -@pytest.mark.xfail(reason="TSDataset 2.0: bug in required_features wit quantiles") +@pytest.mark.xfail(reason="TSDataset 2.0: bug in required_features with quantiles") @pytest.mark.parametrize("period", [1, 7]) def test_single_inverse_transform_inplace_test_quantiles(period, ts_nans_with_noise): """Test that _SingleDifferencingTransform correctly makes inverse_transform on test data with quantiles.""" @@ -446,7 +446,7 @@ def test_single_inverse_transform_inplace_test_quantiles(period, ts_nans_with_no check_inverse_transform_inplace_test_quantiles(transform, ts_nans_with_noise) -@pytest.mark.xfail(reason="TSDataset 2.0: bug in required_features wit quantiles") +@pytest.mark.xfail(reason="TSDataset 2.0: bug in required_features with quantiles") @pytest.mark.parametrize("period", [1, 7]) @pytest.mark.parametrize("order", [1, 2]) def test_full_inverse_transform_inplace_test_quantiles(period, order, ts_nans_with_noise): diff --git a/tests/test_transforms/test_math/test_transform_quantiles.py b/tests/test_transforms/test_math/test_transform_quantiles.py index 08d714d3e..6eb366f61 100644 --- a/tests/test_transforms/test_math/test_transform_quantiles.py +++ b/tests/test_transforms/test_math/test_transform_quantiles.py @@ -42,7 +42,7 @@ def test_standard_scaler_dummy_mean_shift_for_quantiles_macro(toy_dataset_with_m ) -@pytest.mark.xfail(reason="TSDataset 2.0: bug in required_features wit quantiles") +@pytest.mark.xfail(reason="TSDataset 2.0: bug in required_features with quantiles") def test_add_constant_dummy(toy_dataset_equal_targets_and_quantiles): """ This test checks that inverse_transform transforms forecast's quantiles the same way with target itself and @@ -66,7 +66,7 @@ def test_add_constant_dummy(toy_dataset_equal_targets_and_quantiles): np.testing.assert_allclose(toy_dataset.iloc[:, 2], toy_dataset.iloc[:, 3]) -@pytest.mark.xfail(reason="TSDataset 2.0: bug in required_features wit quantiles") +@pytest.mark.xfail(reason="TSDataset 2.0: bug in required_features with quantiles") @pytest.mark.parametrize( "transform", ( From 50d04dace9974887675c34e0431cec14b80818bc Mon Sep 17 00:00:00 2001 From: alex-hse-repository Date: Mon, 26 Sep 2022 18:13:37 +0300 Subject: [PATCH 13/13] Fix typo --- etna/datasets/tsdataset.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/etna/datasets/tsdataset.py b/etna/datasets/tsdataset.py index 173a06a66..9be42a2c5 100644 --- a/etna/datasets/tsdataset.py +++ b/etna/datasets/tsdataset.py @@ -246,7 +246,7 @@ def make_future( f"NaN-s will be used for missing values" ) - # Here only df if required, other metadata is not necessary to build the dataset + # Here only df is required, other metadata is not necessary to build the dataset ts = TSDataset(df=df, freq=self.freq) for transform in transforms: tslogger.log(f"Transform {repr(transform)} is applied to dataset")