Skip to content

Implement SumTranform #1021

Merged
merged 6 commits into from
Nov 30, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Added
-
- Add python 3.10 support ([#1005](https://github.com/tinkoff-ai/etna/pull/1005))
-
- Add `SumTranform`([#1021](https://github.com/tinkoff-ai/etna/pull/1021))
- Add `plot_change_points_interactive` ([#988](https://github.com/tinkoff-ai/etna/pull/988))
- Add `experimental` module with `TimeSeriesBinaryClassifier` and `PredictabilityAnalyzer` ([#985](https://github.com/tinkoff-ai/etna/pull/985))
- Inference track results: add `predict` method to pipelines, teach some models to work with context, change hierarchy of base models, update notebook examples ([#979](https://github.com/tinkoff-ai/etna/pull/979))
Expand Down
1 change: 1 addition & 0 deletions etna/transforms/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
from etna.transforms.math import RobustScalerTransform
from etna.transforms.math import StandardScalerTransform
from etna.transforms.math import StdTransform
from etna.transforms.math import SumTransform
from etna.transforms.math import YeoJohnsonTransform
from etna.transforms.missing_values import ResampleWithDistributionTransform
from etna.transforms.missing_values import TimeSeriesImputerTransform
Expand Down
1 change: 1 addition & 0 deletions etna/transforms/math/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,4 +18,5 @@
from etna.transforms.math.statistics import MinTransform
from etna.transforms.math.statistics import QuantileTransform
from etna.transforms.math.statistics import StdTransform
from etna.transforms.math.statistics import SumTransform
from etna.transforms.math.statistics import WindowStatisticsTransform
53 changes: 53 additions & 0 deletions etna/transforms/math/statistics.py
Original file line number Diff line number Diff line change
Expand Up @@ -557,6 +557,58 @@ def _aggregate(self, series: np.ndarray) -> np.ndarray:
return result


class SumTransform(WindowStatisticsTransform):
"""SumTransform computes sum of values over given window."""

def __init__(
self,
in_column: str,
window: int,
seasonality: int = 1,
min_periods: int = 1,
fillna: float = 0,
out_column: Optional[str] = None,
):
"""Init SumTransform.

Parameters
----------
in_column:
name of processed column
window:
size of window to aggregate, if window == -1 compute rolling sum all over the given series
seasonality:
seasonality of lags to compute window's aggregation with
min_periods:
min number of targets in window to compute aggregation;
if there is less than ``min_periods`` number of targets return None
fillna:
value to fill results NaNs with
out_column:
result column name. If not given use ``self.__repr__()``
"""
self.in_column = in_column
self.window = window
self.seasonality = seasonality
self.min_periods = min_periods
self.fillna = fillna
self.out_column = out_column

super().__init__(
in_column=in_column,
out_column=self.out_column if self.out_column is not None else self.__repr__(),
window=window,
seasonality=seasonality,
min_periods=min_periods,
fillna=fillna,
)

def _aggregate(self, series: np.ndarray) -> np.ndarray:
"""Compute sum over the series."""
series = bn.nansum(series, axis=2)
return series


__all__ = [
"MedianTransform",
"MaxTransform",
Expand All @@ -567,4 +619,5 @@ def _aggregate(self, series: np.ndarray) -> np.ndarray:
"WindowStatisticsTransform",
"MADTransform",
"MinMaxDifferenceTransform",
"SumTransform",
]
55 changes: 55 additions & 0 deletions tests/test_transforms/test_math/test_statistics_transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from etna.transforms.math import MinTransform
from etna.transforms.math import QuantileTransform
from etna.transforms.math import StdTransform
from etna.transforms.math import SumTransform


@pytest.fixture
Expand Down Expand Up @@ -62,6 +63,8 @@ def df_for_agg_with_nan() -> pd.DataFrame:
(MADTransform, "test_mad"),
(MinMaxDifferenceTransform, None),
(MinMaxDifferenceTransform, "test_min_max_diff"),
(SumTransform, None),
(SumTransform, "test_sum"),
),
)
def test_interface_simple(simple_df_for_agg: pd.DataFrame, class_name: Any, out_column: str):
Expand Down Expand Up @@ -279,6 +282,57 @@ def test_min_max_diff_feature(
assert (res["expected"] == res["segment_1"]["result"]).all()


@pytest.mark.parametrize(
"window,periods,fill_na,expected",
((10, 1, 0, np.array([-1, 0, 3, 3, 7, 16, 24, 29, 35, 35])),),
)
def test_sum_feature_with_nan(
df_for_agg_with_nan: pd.DataFrame,
window: int,
periods: int,
fill_na: float,
expected: np.ndarray,
):
transform = SumTransform(
window=window,
min_periods=periods,
fillna=fill_na,
in_column="target",
out_column="result",
)
res = transform.fit_transform(df_for_agg_with_nan)
np.testing.assert_array_almost_equal(expected, res["segment_1"]["result"])


@pytest.mark.parametrize(
"window,periods,fill_na,expected",
(
(10, 1, 0, np.array([0, 1, 3, 6, 10, 15, 21, 28, 36, 45])),
(-1, 1, 0, np.array([0, 1, 3, 6, 10, 15, 21, 28, 36, 45])),
(3, 1, -17, np.array([0, 1, 3, 6, 9, 12, 15, 18, 21, 24])),
(1, 1, -17, np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])),
(3, 3, -17, np.array([-17, -17, 3, 6, 9, 12, 15, 18, 21, 24])),
),
)
def test_sum_feature(
brsnw250 marked this conversation as resolved.
Show resolved Hide resolved
simple_df_for_agg: pd.DataFrame,
window: int,
periods: int,
fill_na: float,
expected: np.array,
):
transform = SumTransform(
window=window,
min_periods=periods,
fillna=fill_na,
in_column="target",
out_column="result",
)

res = transform.fit_transform(simple_df_for_agg)
np.testing.assert_array_almost_equal(expected, res["segment_1"]["result"])


@pytest.mark.parametrize(
"transform",
(
Expand All @@ -289,6 +343,7 @@ def test_min_max_diff_feature(
StdTransform(in_column="target", window=5),
MADTransform(in_column="target", window=5),
MinMaxDifferenceTransform(in_column="target", window=5),
SumTransform(in_column="target", window=5),
),
)
def test_fit_transform_with_nans(transform, ts_diff_endings):
Expand Down