Skip to content

Commit

Permalink
Fix/optimized historical forecast with component specific lags (#2040)
Browse files Browse the repository at this point in the history
* fix: properly call model._get_lags instead of model.lags.get to account for component specific lags

* updated changelog

* add unit tests

---------

Co-authored-by: dennisbader <dennis.bader@gmx.ch>
  • Loading branch information
madtoinou and dennisbader authored Oct 28, 2023
1 parent e6f2208 commit 039d898
Show file tree
Hide file tree
Showing 4 changed files with 64 additions and 10 deletions.
4 changes: 3 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,16 @@ but cannot always guarantee backwards compatibility. Changes that may **break co
[Full Changelog](https://github.com/unit8co/darts/compare/0.26.0...master)

### For users of the library:

**Improved**
- Improvements to `TorchForecastingModel`:
- Added callback `darts.utils.callbacks.TFMProgressBar` to customize at which model stages to display the progress bar. [#2020](https://github.com/unit8co/darts/pull/2020) by [Dennis Bader](https://github.com/dennisbader).
- Improvements to documentation:
- Adapted the example notebooks to properly apply data transformers and avoid look-ahead bias. [#2020](https://github.com/unit8co/darts/pull/2020) by [Samriddhi Singh](https://github.com/SimTheGreat).

**Fixed**
- Fixed a bug when trying to divide `pd.Timedelta` by a `pd.Offset` with an ambiguous conversion to `pd.Timedelta` when using encoders. [#2034](https://github.com/unit8co/darts/pull/2034) by [Antoine Madrona](https://github.com/madtoinou).
- Fixed a bug when calling optimized `historical_forecasts()` for a `RegressionModel` trained with unequal component-specific lags. [#2040](https://github.com/unit8co/darts/pull/2040) by [Antoine Madrona](https://github.com/madtoinou).
- Fixed a bug when using encoders with `RegressionModel` and series with a non-evenly spaced frequency (e.g. Month Begin). This raised an error during lagged data creation when trying to divide a pd.Timedelta by the ambiguous frequency. [#2034](https://github.com/unit8co/darts/pull/2034) by [Antoine Madrona](https://github.com/madtoinou).

### For developers of the library:

Expand Down
3 changes: 2 additions & 1 deletion darts/models/forecasting/regression_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,7 @@ def encode_year(idx):
)

# convert lags arguments to list of int
# lags attribute should always be accessed with self._get_lags(), not self.lags.get()
self.lags, self.component_lags = self._generate_lags(
lags=lags,
lags_past_covariates=lags_past_covariates,
Expand Down Expand Up @@ -373,7 +374,7 @@ def _get_lags(self, lags_type: str):
if lags_type in self.component_lags:
return self.component_lags[lags_type]
else:
return self.lags.get(lags_type)
return self.lags.get(lags_type, None)

@property
def _model_encoder_settings(
Expand Down
51 changes: 51 additions & 0 deletions darts/tests/models/forecasting/test_historical_forecasts.py
Original file line number Diff line number Diff line change
Expand Up @@ -952,6 +952,57 @@ def test_optimized_historical_forecasts_regression_with_encoders(self, config):
assert (hfc.time_index == ohfc.time_index).all()
np.testing.assert_array_almost_equal(hfc.all_values(), ohfc.all_values())

def test_optimized_historical_forecasts_regression_with_component_specific_lags(
self,
):
horizon = 1
lags = 3
len_val_series = 10
series_train, series_val = (
self.ts_pass_train[:10],
self.ts_pass_val[:len_val_series],
)
model = LinearRegressionModel(
lags=lags,
lags_past_covariates={"default_lags": 2, "darts_enc_pc_dta_dayofweek": 1},
lags_future_covariates=[2, 3],
add_encoders={
"cyclic": {"future": ["month"]},
"datetime_attribute": {"past": ["dayofweek"]},
},
)
model.fit(series_train)
hist_fct = model.historical_forecasts(
series=series_val,
retrain=False,
enable_optimization=False,
)

opti_hist_fct = model._optimized_historical_forecasts(series=[series_val])

if not isinstance(hist_fct, list):
hist_fct = [hist_fct]
opti_hist_fct = [opti_hist_fct]

n_pred_series_expected = 1
n_pred_points_expected = len(series_val) - lags - horizon + 1
first_ts_expected = (
series_val.time_index[lags] + (horizon - 1) * series_val.freq
)
last_ts_expected = series_val.end_time()

# check length match between optimized and default hist fc
assert len(opti_hist_fct) == n_pred_series_expected
assert len(hist_fct) == len(opti_hist_fct)
# check hist fc start
assert opti_hist_fct[0].start_time() == first_ts_expected
# check hist fc end
assert opti_hist_fct[-1].end_time() == last_ts_expected
for hfc, ohfc in zip(hist_fct, opti_hist_fct):
assert len(ohfc) == n_pred_points_expected
assert (hfc.time_index == ohfc.time_index).all()
np.testing.assert_array_almost_equal(hfc.all_values(), ohfc.all_values())

@pytest.mark.slow
@pytest.mark.skipif(not TORCH_AVAILABLE, reason="requires torch")
@pytest.mark.parametrize("model_config", models_torch_cls_kwargs)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -93,17 +93,17 @@ def _optimized_historical_forecasts_regression_last_points_only(

X, times = create_lagged_prediction_data(
target_series=None
if len(model.lags.get("target", [])) == 0
if model._get_lags("target") is None
else series_[hist_fct_tgt_start:hist_fct_tgt_end],
past_covariates=None
if past_covariates_ is None
else past_covariates_[hist_fct_pc_start:hist_fct_pc_end],
future_covariates=None
if future_covariates_ is None
else future_covariates_[hist_fct_fc_start:hist_fct_fc_end],
lags=model.lags.get("target", None),
lags_past_covariates=model.lags.get("past", None),
lags_future_covariates=model.lags.get("future", None),
lags=model._get_lags("target"),
lags_past_covariates=model._get_lags("past"),
lags_future_covariates=model._get_lags("future"),
uses_static_covariates=model.uses_static_covariates,
last_static_covariates_shape=model._static_covariates_shape,
max_samples_per_ts=None,
Expand Down Expand Up @@ -238,17 +238,17 @@ def _optimized_historical_forecasts_regression_all_points(

X, _ = create_lagged_prediction_data(
target_series=None
if len(model.lags.get("target", [])) == 0
if model._get_lags("target") is None
else series_[hist_fct_tgt_start:hist_fct_tgt_end],
past_covariates=None
if past_covariates_ is None
else past_covariates_[hist_fct_pc_start:hist_fct_pc_end],
future_covariates=None
if future_covariates_ is None
else future_covariates_[hist_fct_fc_start:hist_fct_fc_end],
lags=model.lags.get("target", None),
lags_past_covariates=model.lags.get("past", None),
lags_future_covariates=model.lags.get("future", None),
lags=model._get_lags("target"),
lags_past_covariates=model._get_lags("past"),
lags_future_covariates=model._get_lags("future"),
uses_static_covariates=model.uses_static_covariates,
last_static_covariates_shape=model._static_covariates_shape,
max_samples_per_ts=None,
Expand Down

0 comments on commit 039d898

Please sign in to comment.