Skip to content

Hotfix/cli bug #758

Merged
merged 7 commits into from
Jun 21, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
-
-
### Fixed
-
- Add `known_future` parameter to CLI ([#758](https://github.com/tinkoff-ai/etna/pull/758))
-
-
-
Expand Down
14 changes: 13 additions & 1 deletion etna/commands/backtest_command.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
from pathlib import Path
from typing import Any
from typing import Dict
from typing import List
from typing import Literal
from typing import Optional
from typing import Sequence
from typing import Union

import hydra_slayer
import pandas as pd
Expand All @@ -19,6 +23,12 @@ def backtest(
freq: str = typer.Argument(..., help="frequency of timestamp in files in pandas format"),
output_path: Path = typer.Argument(..., help="where to save forecast"),
exog_path: Optional[Path] = typer.Argument(default=None, help="path to csv with exog data"),
known_future: Optional[List[str]] = typer.Argument(
None,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why not default="all"?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Because "all" is not List. And Typer does not support Union. So I can only configure it like this "all" if not known_future else known_future

Copy link
Contributor

@martins0n martins0n Jun 17, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why not?
It's normal workaround I guess

help="list of all known_future columns (regressor "
"columns). If not specified then all exog_columns "
"considered known_future.",
),
):
"""Command to run backtest with etna without coding.

Expand Down Expand Up @@ -60,11 +70,13 @@ def backtest(
df_timeseries = TSDataset.to_dataset(df_timeseries)

df_exog = None
k_f: Union[Literal["all"], Sequence[Any]] = ()
if exog_path:
df_exog = pd.read_csv(exog_path, parse_dates=["timestamp"])
df_exog = TSDataset.to_dataset(df_exog)
k_f = "all" if not known_future else known_future

tsdataset = TSDataset(df=df_timeseries, freq=freq, df_exog=df_exog)
tsdataset = TSDataset(df=df_timeseries, freq=freq, df_exog=df_exog, known_future=k_f)

pipeline: Pipeline = hydra_slayer.get_from_params(**pipeline_configs)
backtest_configs_hydra_slayer: Dict[str, Any] = hydra_slayer.get_from_params(**backtest_configs)
Expand Down
14 changes: 13 additions & 1 deletion etna/commands/forecast_command.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
from pathlib import Path
from typing import Any
from typing import Dict
from typing import List
from typing import Literal
from typing import Optional
from typing import Sequence
from typing import Union

import hydra_slayer
import pandas as pd
Expand All @@ -20,6 +24,12 @@ def forecast(
exog_path: Optional[Path] = typer.Argument(None, help="path to csv with exog data"),
forecast_config_path: Optional[Path] = typer.Argument(None, help="path to yaml config with forecast params"),
raw_output: bool = typer.Argument(False, help="by default we return only forecast without features"),
known_future: Optional[List[str]] = typer.Argument(
None,
help="list of all known_future columns (regressor "
"columns). If not specified then all exog_columns "
"considered known_future.",
),
):
"""Command to make forecast with etna without coding.

Expand Down Expand Up @@ -65,11 +75,13 @@ def forecast(
df_timeseries = TSDataset.to_dataset(df_timeseries)

df_exog = None
k_f: Union[Literal["all"], Sequence[Any]] = ()
if exog_path:
df_exog = pd.read_csv(exog_path, parse_dates=["timestamp"])
df_exog = TSDataset.to_dataset(df_exog)
k_f = "all" if not known_future else known_future

tsdataset = TSDataset(df=df_timeseries, freq=freq, df_exog=df_exog)
tsdataset = TSDataset(df=df_timeseries, freq=freq, df_exog=df_exog, known_future=k_f)

pipeline: Pipeline = hydra_slayer.get_from_params(**pipeline_configs)
pipeline.fit(tsdataset)
Expand Down
64 changes: 64 additions & 0 deletions tests/test_commands/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,70 @@ def base_pipeline_yaml_path():
tmp.close()


@pytest.fixture
def elementary_linear_model_pipeline():
tmp = NamedTemporaryFile("w")
tmp.write(
"""
_target_: etna.pipeline.Pipeline
horizon: 3
model:
_target_: etna.models.LinearPerSegmentModel
"""
)
tmp.flush()
yield Path(tmp.name)
tmp.close()


@pytest.fixture
def elementary_boosting_model_pipeline():
tmp = NamedTemporaryFile("w")
tmp.write(
"""
_target_: etna.pipeline.Pipeline
horizon: 3
model:
_target_: etna.models.CatBoostModelPerSegment
"""
)
tmp.flush()
yield Path(tmp.name)
tmp.close()


@pytest.fixture
def increasing_timeseries_path():
df = pd.DataFrame(
{
"timestamp": list(pd.date_range("2022-06-01", periods=10)),
"target": list(range(10)),
"segment": ["segment_0"] * 10,
}
)
tmp = NamedTemporaryFile("w")
df.to_csv(tmp, index=False)
tmp.flush()
yield Path(tmp.name)
tmp.close()


@pytest.fixture
def increasing_timeseries_exog_path():
df_regressors = pd.DataFrame(
{
"timestamp": list(pd.date_range("2022-06-01", periods=13)),
"regressor_1": list(range(10)) + [3, 3, 3],
"segment": ["segment_0"] * 13,
}
)
tmp = NamedTemporaryFile("w")
df_regressors.to_csv(tmp, index=False)
tmp.flush()
yield Path(tmp.name)
tmp.close()


@pytest.fixture
def base_pipeline_omegaconf_path():
tmp = NamedTemporaryFile("w")
Expand Down
31 changes: 31 additions & 0 deletions tests/test_commands/test_forecast.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from tempfile import NamedTemporaryFile

import pandas as pd
import pytest


def test_dummy_run_with_exog(base_pipeline_yaml_path, base_timeseries_path, base_timeseries_exog_path):
Expand Down Expand Up @@ -69,3 +70,33 @@ def test_run_with_predictive_intervals(
df_output = pd.read_csv(tmp_output_path)
for q in [0.025, 0.975]:
assert f"target_{q}" in df_output.columns


@pytest.mark.parametrize(
"model_pipeline",
[
"elementary_linear_model_pipeline",
"elementary_boosting_model_pipeline",
],
)
def test_forecast_use_exog_correct(
model_pipeline, increasing_timeseries_path, increasing_timeseries_exog_path, request
):
tmp_output = NamedTemporaryFile("w")
tmp_output_path = Path(tmp_output.name)
model_pipeline = request.getfixturevalue(model_pipeline)
run(
[
"etna",
"forecast",
str(model_pipeline),
str(increasing_timeseries_path),
"D",
str(tmp_output_path),
str(increasing_timeseries_exog_path),
]
)
df_output = pd.read_csv(tmp_output_path)
pd.testing.assert_series_equal(
df_output["target"], pd.Series(data=[3.0, 3.0, 3.0], name="target"), check_less_precise=1
)