tinkoff-ai · iKintosh · Jun 21, 2022 · Jun 17, 2022 · Jun 17, 2022 · Jun 17, 2022
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -36,7 +36,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - 
 - 
 ### Fixed
-- 
+- Add `known_future` parameter to CLI ([#758](https://github.com/tinkoff-ai/etna/pull/758))
 - 
 - 
 - 

diff --git a/etna/commands/backtest_command.py b/etna/commands/backtest_command.py
@@ -1,7 +1,11 @@
 from pathlib import Path
 from typing import Any
 from typing import Dict
+from typing import List
+from typing import Literal
 from typing import Optional
+from typing import Sequence
+from typing import Union
 
 import hydra_slayer
 import pandas as pd
@@ -19,6 +23,12 @@ def backtest(
     freq: str = typer.Argument(..., help="frequency of timestamp in files in pandas format"),
     output_path: Path = typer.Argument(..., help="where to save forecast"),
     exog_path: Optional[Path] = typer.Argument(default=None, help="path to csv with exog data"),
+    known_future: Optional[List[str]] = typer.Argument(
+        None,
+        help="list of all known_future columns (regressor "
+        "columns). If not specified then all exog_columns "
+        "considered known_future.",
+    ),
 ):
     """Command to run backtest with etna without coding.
 
@@ -60,11 +70,13 @@ def backtest(
     df_timeseries = TSDataset.to_dataset(df_timeseries)
 
     df_exog = None
+    k_f: Union[Literal["all"], Sequence[Any]] = ()
     if exog_path:
         df_exog = pd.read_csv(exog_path, parse_dates=["timestamp"])
         df_exog = TSDataset.to_dataset(df_exog)
+        k_f = "all" if not known_future else known_future
 
-    tsdataset = TSDataset(df=df_timeseries, freq=freq, df_exog=df_exog)
+    tsdataset = TSDataset(df=df_timeseries, freq=freq, df_exog=df_exog, known_future=k_f)
 
     pipeline: Pipeline = hydra_slayer.get_from_params(**pipeline_configs)
     backtest_configs_hydra_slayer: Dict[str, Any] = hydra_slayer.get_from_params(**backtest_configs)

diff --git a/etna/commands/forecast_command.py b/etna/commands/forecast_command.py
@@ -1,7 +1,11 @@
 from pathlib import Path
 from typing import Any
 from typing import Dict
+from typing import List
+from typing import Literal
 from typing import Optional
+from typing import Sequence
+from typing import Union
 
 import hydra_slayer
 import pandas as pd
@@ -20,6 +24,12 @@ def forecast(
     exog_path: Optional[Path] = typer.Argument(None, help="path to csv with exog data"),
     forecast_config_path: Optional[Path] = typer.Argument(None, help="path to yaml config with forecast params"),
     raw_output: bool = typer.Argument(False, help="by default we return only forecast without features"),
+    known_future: Optional[List[str]] = typer.Argument(
+        None,
+        help="list of all known_future columns (regressor "
+        "columns). If not specified then all exog_columns "
+        "considered known_future.",
+    ),
 ):
     """Command to make forecast with etna without coding.
 
@@ -65,11 +75,13 @@ def forecast(
     df_timeseries = TSDataset.to_dataset(df_timeseries)
 
     df_exog = None
+    k_f: Union[Literal["all"], Sequence[Any]] = ()
     if exog_path:
         df_exog = pd.read_csv(exog_path, parse_dates=["timestamp"])
         df_exog = TSDataset.to_dataset(df_exog)
+        k_f = "all" if not known_future else known_future
 
-    tsdataset = TSDataset(df=df_timeseries, freq=freq, df_exog=df_exog)
+    tsdataset = TSDataset(df=df_timeseries, freq=freq, df_exog=df_exog, known_future=k_f)
 
     pipeline: Pipeline = hydra_slayer.get_from_params(**pipeline_configs)
     pipeline.fit(tsdataset)

diff --git a/tests/test_commands/conftest.py b/tests/test_commands/conftest.py
@@ -28,6 +28,70 @@ def base_pipeline_yaml_path():
     tmp.close()
 
 
+@pytest.fixture
+def elementary_linear_model_pipeline():
+    tmp = NamedTemporaryFile("w")
+    tmp.write(
+        """
+        _target_: etna.pipeline.Pipeline
+        horizon: 3
+        model:
+          _target_: etna.models.LinearPerSegmentModel
+        """
+    )
+    tmp.flush()
+    yield Path(tmp.name)
+    tmp.close()
+
+
+@pytest.fixture
+def elementary_boosting_model_pipeline():
+    tmp = NamedTemporaryFile("w")
+    tmp.write(
+        """
+        _target_: etna.pipeline.Pipeline
+        horizon: 3
+        model:
+          _target_: etna.models.CatBoostModelPerSegment
+        """
+    )
+    tmp.flush()
+    yield Path(tmp.name)
+    tmp.close()
+
+
+@pytest.fixture
+def increasing_timeseries_path():
+    df = pd.DataFrame(
+        {
+            "timestamp": list(pd.date_range("2022-06-01", periods=10)),
+            "target": list(range(10)),
+            "segment": ["segment_0"] * 10,
+        }
+    )
+    tmp = NamedTemporaryFile("w")
+    df.to_csv(tmp, index=False)
+    tmp.flush()
+    yield Path(tmp.name)
+    tmp.close()
+
+
+@pytest.fixture
+def increasing_timeseries_exog_path():
+    df_regressors = pd.DataFrame(
+        {
+            "timestamp": list(pd.date_range("2022-06-01", periods=13)),
+            "regressor_1": list(range(10)) + [3, 3, 3],
+            "segment": ["segment_0"] * 13,
+        }
+    )
+    tmp = NamedTemporaryFile("w")
+    df_regressors.to_csv(tmp, index=False)
+    tmp.flush()
+    yield Path(tmp.name)
+    tmp.close()
+
+
 @pytest.fixture
 def base_pipeline_omegaconf_path():
     tmp = NamedTemporaryFile("w")

diff --git a/tests/test_commands/test_forecast.py b/tests/test_commands/test_forecast.py
@@ -3,6 +3,7 @@
 from tempfile import NamedTemporaryFile
 
 import pandas as pd
+import pytest
 
 
 def test_dummy_run_with_exog(base_pipeline_yaml_path, base_timeseries_path, base_timeseries_exog_path):
@@ -69,3 +70,33 @@ def test_run_with_predictive_intervals(
     df_output = pd.read_csv(tmp_output_path)
     for q in [0.025, 0.975]:
         assert f"target_{q}" in df_output.columns
+
+
+@pytest.mark.parametrize(
+    "model_pipeline",
+    [
+        "elementary_linear_model_pipeline",
+        "elementary_boosting_model_pipeline",
+    ],
+)
+def test_forecast_use_exog_correct(
+    model_pipeline, increasing_timeseries_path, increasing_timeseries_exog_path, request
+):
+    tmp_output = NamedTemporaryFile("w")
+    tmp_output_path = Path(tmp_output.name)
+    model_pipeline = request.getfixturevalue(model_pipeline)
+    run(
+        [
+            "etna",
+            "forecast",
+            str(model_pipeline),
+            str(increasing_timeseries_path),
+            "D",
+            str(tmp_output_path),
+            str(increasing_timeseries_exog_path),
+        ]
+    )
+    df_output = pd.read_csv(tmp_output_path)
+    pd.testing.assert_series_equal(
+        df_output["target"], pd.Series(data=[3.0, 3.0, 3.0], name="target"), check_less_precise=1
+    )