Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feat/improve integer indexes #1191

Merged
merged 35 commits into from
Sep 22, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
6a04810
solve an issue in get_index_at_point()
hrzn Sep 2, 2022
494ac95
correct an issue when backtesting with stride
hrzn Sep 2, 2022
dde110f
Improve a few things with integer indexing
hrzn Sep 2, 2022
b3b7f3a
fix get_index_at_point with step>1
hrzn Sep 5, 2022
2cdd868
handle step>1 cases in time series generation
hrzn Sep 5, 2022
d2b869c
some corrections to integer indexing
hrzn Sep 6, 2022
b63a2ba
Change default freq in time series generation functions
hrzn Sep 6, 2022
fee071e
Make index generation more general
hrzn Sep 6, 2022
87f5819
Correct another case in TimeSeries
hrzn Sep 6, 2022
217bafd
Fix gaps() for integer series
hrzn Sep 6, 2022
1e1d479
correct regression models
hrzn Sep 6, 2022
d6f3d84
Merge branch 'master' into feat/improve-integer-indexes
hrzn Sep 6, 2022
82e809f
fix an issue in slice()
hrzn Sep 7, 2022
323ed44
catch another case in slice()
hrzn Sep 7, 2022
de2516d
small improvement in regression models
hrzn Sep 7, 2022
88db97c
Fix a small issue in timeseries tests
hrzn Sep 7, 2022
c2bb455
add testing of integer series with RegressionModel
hrzn Sep 7, 2022
356673e
Merge branch 'master' into feat/improve-integer-indexes
hrzn Sep 7, 2022
e3f9122
Merge branch 'master' into feat/improve-integer-indexes
hrzn Sep 12, 2022
08bb701
Merge branch 'master' into feat/improve-integer-indexes
hrzn Sep 15, 2022
5385968
Merge branch 'master' into feat/improve-integer-indexes
hrzn Sep 19, 2022
4c86ce6
fix frequency in historical forecasts
hrzn Sep 19, 2022
9b9c187
Update darts/tests/test_timeseries.py
hrzn Sep 19, 2022
3c60fa3
Update darts/tests/test_timeseries.py
hrzn Sep 19, 2022
03af5f7
Update darts/tests/test_timeseries.py
hrzn Sep 19, 2022
bb64ea0
Update darts/tests/utils/test_timeseries_generation.py
hrzn Sep 19, 2022
182ac16
Update darts/tests/utils/test_timeseries_generation.py
hrzn Sep 19, 2022
317ffe2
make generate_index() public
hrzn Sep 19, 2022
d405035
Merge branch 'feat/improve-integer-indexes' of github.com:unit8co/dar…
hrzn Sep 19, 2022
1315182
Test more freqs with datetimeindex
hrzn Sep 19, 2022
038f09a
Update darts/timeseries.py
hrzn Sep 19, 2022
4dd8cfc
please black
hrzn Sep 19, 2022
98a21b5
fix generation tests
hrzn Sep 19, 2022
3d105d6
Merge branch 'master' into feat/improve-integer-indexes
hrzn Sep 19, 2022
4de6f66
Merge branch 'master' into feat/improve-integer-indexes
hrzn Sep 22, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion darts/models/forecasting/forecasting_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -525,7 +525,7 @@ def historical_forecasts(
pd.RangeIndex(
start=last_points_times[0],
stop=last_points_times[-1] + 1,
step=1,
step=series.freq * stride,
),
np.array(last_points_values),
columns=series.columns,
Expand Down
22 changes: 12 additions & 10 deletions darts/models/forecasting/regression_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -636,22 +636,24 @@ def predict(
f"but it ranges only from {cov.start_time()} until {cov.end_time()}.",
)

if cov.has_datetime_index:
covariate_matrices[cov_type].append(
cov[first_req_ts:last_req_ts].values()
)
else:
# include last_req_ts when slicing series with integer indices
covariate_matrices[cov_type].append(
cov[first_req_ts : last_req_ts + 1].values()
)
# Note: we use slice() rather than the [] operator because
# for integer-indexed series [] does not act on the time index.
last_req_ts = (
# For range indexes, we need to make the end timestamp inclusive here
last_req_ts + ts.freq
if ts.has_range_index
else last_req_ts
)
covariate_matrices[cov_type].append(
cov.slice(first_req_ts, last_req_ts).values(copy=False)
)

covariate_matrices[cov_type] = np.stack(covariate_matrices[cov_type])

series_matrix = None
if "target" in self.lags:
series_matrix = np.stack(
[ts[self.lags["target"][0] :].values() for ts in series]
[ts[self.lags["target"][0] :].values(copy=False) for ts in series]
)

# repeat series_matrix to shape (num_samples * num_series, n_lags, n_components)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ class CovariateIndexGeneratorTestCase(DartsBaseTestClass):
# pd.DatetimeIndex
# target covariate for inference dataset for n <= output_chunk_length
cov_time_inf_short = TimeSeries.from_times_and_values(
tg._generate_index(
tg.generate_index(
start=target_time.start_time(),
length=n_target + n_short,
freq=target_time.freq,
Expand All @@ -43,7 +43,7 @@ class CovariateIndexGeneratorTestCase(DartsBaseTestClass):
)
# target covariate for inference dataset for n > output_chunk_length
cov_time_inf_long = TimeSeries.from_times_and_values(
tg._generate_index(
tg.generate_index(
start=target_time.start_time(),
length=n_target + n_long,
freq=target_time.freq,
Expand All @@ -54,7 +54,7 @@ class CovariateIndexGeneratorTestCase(DartsBaseTestClass):
# integer index
# target covariate for inference dataset for n <= output_chunk_length
cov_int_inf_short = TimeSeries.from_times_and_values(
tg._generate_index(
tg.generate_index(
start=target_int.start_time(),
length=n_target + n_short,
freq=target_int.freq,
Expand All @@ -63,7 +63,7 @@ class CovariateIndexGeneratorTestCase(DartsBaseTestClass):
)
# target covariate for inference dataset for n > output_chunk_length
cov_int_inf_long = TimeSeries.from_times_and_values(
tg._generate_index(
tg.generate_index(
start=target_int.start_time(),
length=n_target + n_long,
freq=target_int.freq,
Expand Down
12 changes: 6 additions & 6 deletions darts/tests/models/forecasting/test_encoders.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ class EncoderTestCase(DartsBaseTestClass):
# multi-TS at prediction should be as follows
inf_ts_short_future = [
TimeSeries.from_times_and_values(
tg._generate_index(
tg.generate_index(
start=ts.end_time() + (1 - 12) * ts.freq, length=12 + 6, freq=ts.freq
),
np.arange(12 + 6),
Expand All @@ -66,7 +66,7 @@ class EncoderTestCase(DartsBaseTestClass):

inf_ts_long_future = [
TimeSeries.from_times_and_values(
tg._generate_index(
tg.generate_index(
start=ts.end_time() + (1 - 12) * ts.freq, length=12 + 8, freq=ts.freq
),
np.arange(12 + 8),
Expand All @@ -76,7 +76,7 @@ class EncoderTestCase(DartsBaseTestClass):

inf_ts_short_past = [
TimeSeries.from_times_and_values(
tg._generate_index(
tg.generate_index(
start=ts.end_time() + (1 - 12) * ts.freq, length=12, freq=ts.freq
),
np.arange(12),
Expand All @@ -86,7 +86,7 @@ class EncoderTestCase(DartsBaseTestClass):

inf_ts_long_past = [
TimeSeries.from_times_and_values(
tg._generate_index(
tg.generate_index(
start=ts.end_time() + (1 - 12) * ts.freq,
length=12 + (8 - 6),
freq=ts.freq,
Expand Down Expand Up @@ -298,7 +298,7 @@ def test_cyclic_encoder(self):
attribute = "month"

month_series = TimeSeries.from_times_and_values(
times=tg._generate_index(
times=tg.generate_index(
start=pd.to_datetime("2000-01-01"), length=24, freq="MS"
),
values=np.arange(24),
Expand Down Expand Up @@ -338,7 +338,7 @@ def test_datetime_attribute_encoder(self):
attribute = "month"

month_series = TimeSeries.from_times_and_values(
times=tg._generate_index(
times=tg.generate_index(
start=pd.to_datetime("2000-01-01"), length=24, freq="MS"
),
values=np.arange(24),
Expand Down
27 changes: 25 additions & 2 deletions darts/tests/models/forecasting/test_local_forecasting_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -214,11 +214,11 @@ def test_exogenous_variables_support(self):

# test case with numerical pd.RangeIndex
target_num_idx = TimeSeries.from_times_and_values(
times=tg._generate_index(start=0, length=len(self.ts_gaussian)),
times=tg.generate_index(start=0, length=len(self.ts_gaussian)),
values=self.ts_gaussian.all_values(copy=False),
)
fc_num_idx = TimeSeries.from_times_and_values(
times=tg._generate_index(start=0, length=len(self.ts_gaussian_long)),
times=tg.generate_index(start=0, length=len(self.ts_gaussian_long)),
values=self.ts_gaussian_long.all_values(copy=False),
)

Expand Down Expand Up @@ -270,6 +270,29 @@ def test_dummy_series(self):
with self.assertRaises(ValueError):
autoarima.fit(series=ts)

def test_forecast_time_index(self):
# the forecast time index should follow that of the train series

# integer-index, with step>1
values = np.random.rand(20)
idx = pd.RangeIndex(start=10, stop=50, step=2)
ts = TimeSeries.from_times_and_values(idx, values)

model = NaiveSeasonal(K=1)
model.fit(ts)
pred = model.predict(n=5)
self.assertTrue(
all(pred.time_index == pd.RangeIndex(start=50, stop=60, step=2))
)

# datetime-index
ts = tg.constant_timeseries(start=pd.Timestamp("20130101"), length=20, value=1)
model = NaiveSeasonal(K=1)
model.fit(ts)
pred = model.predict(n=5)
self.assertEqual(pred.start_time(), pd.Timestamp("20130121"))
self.assertEqual(pred.end_time(), pd.Timestamp("20130125"))

def test_statsmodels_dual_models(self):

# same tests, but VARIMA requires to work on a multivariate target series
Expand Down
34 changes: 34 additions & 0 deletions darts/tests/models/forecasting/test_regression_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -992,6 +992,40 @@ def test_gradient_boosted_model_with_eval_set(self, lgb_fit_patch):
assert lgb_fit_patch.call_args[1]["eval_set"] is not None
assert lgb_fit_patch.call_args[1]["early_stopping_rounds"] == 2

def test_integer_indexed_series(self):
values_target = np.random.rand(30)
values_past_cov = np.random.rand(30)
values_future_cov = np.random.rand(30)

idx1 = pd.RangeIndex(start=0, stop=30, step=1)
idx2 = pd.RangeIndex(start=10, stop=70, step=2)

preds = []

for idx in [idx1, idx2]:
target = TimeSeries.from_times_and_values(idx, values_target)
past_cov = TimeSeries.from_times_and_values(idx, values_past_cov)
future_cov = TimeSeries.from_times_and_values(idx, values_future_cov)

train, _ = target[:20], target[20:]

model = LinearRegressionModel(
lags=[-2, -1], lags_past_covariates=[-2, -1], lags_future_covariates=[0]
)
model.fit(
series=train, past_covariates=past_cov, future_covariates=future_cov
)

preds.append(model.predict(n=10))

# the predicted values should not depend on the time axis
np.testing.assert_equal(preds[0].values(), preds[1].values())

# the time axis returned by the second model should be as expected
self.assertTrue(
all(preds[1].time_index == pd.RangeIndex(start=50, stop=70, step=2))
)

def test_encoders(self):
max_past_lag = -4
max_future_lag = 4
Expand Down
89 changes: 89 additions & 0 deletions darts/tests/test_timeseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,36 @@ def test_integer_indexing(self):
list(indexed_ts.time_index) == list(pd.RangeIndex(2, 7, step=1))
)

# check integer indexing features when series index does not start at 0
values = np.random.random(100)
times = pd.RangeIndex(10, 110)
series: TimeSeries = TimeSeries.from_times_and_values(times, values)

# getting index for idx should return i s.t., series[i].time == idx
self.assertEqual(series.get_index_at_point(101), 91)

# check integer indexing features when series index starts at 0 with a step > 1
values = np.random.random(100)
times = pd.RangeIndex(0, 200, step=2)
series: TimeSeries = TimeSeries.from_times_and_values(times, values)

# getting index for idx should return i s.t., series[i].time == idx
self.assertEqual(series.get_index_at_point(100), 50)

# slicing should act the same irrespective of the initial time stamp
np.testing.assert_equal(series[10:20].values().flatten(), values[10:20])

# drop_after should act on the timestamp
np.testing.assert_equal(series.drop_after(20).values().flatten(), values[:10])

# test get_index_at_point on series which does not start at 0 and with a step > 1
values = np.random.random(10)
times = pd.RangeIndex(10, 30, step=2)
series: TimeSeries = TimeSeries.from_times_and_values(times, values)

# getting index for idx should return i s.t., series[i].time == idx
self.assertEqual(series.get_index_at_point(16), 3)

def test_univariate_component(self):
series = TimeSeries.from_values(np.array([10, 20, 30])).with_columns_renamed(
"0", "component"
Expand Down Expand Up @@ -233,6 +263,36 @@ def helper_test_slice(test_case, test_series: TimeSeries):
test_case.assertEqual(seriesC.start_time(), pd.Timestamp("20130108"))
test_case.assertEqual(seriesC.end_time(), pd.Timestamp("20130110"))

# integer-indexed series, starting at 0
values = np.random.rand(30)
idx = pd.RangeIndex(start=0, stop=30, step=1)
ts = TimeSeries.from_times_and_values(idx, values)
slice_vals = ts.slice(10, 20).values(copy=False).flatten()
np.testing.assert_equal(slice_vals, values[10:20])

# integer-indexed series, not starting at 0
values = np.random.rand(30)
idx = pd.RangeIndex(start=5, stop=35, step=1)
ts = TimeSeries.from_times_and_values(idx, values)
slice_vals = ts.slice(10, 20).values(copy=False).flatten()
np.testing.assert_equal(slice_vals, values[5:15])

# integer-indexed series, starting at 0, with step > 1
values = np.random.rand(30)
idx = pd.RangeIndex(start=0, stop=60, step=2)
ts = TimeSeries.from_times_and_values(idx, values)
slice_vals = ts.slice(10, 20).values(copy=False).flatten()
np.testing.assert_equal(slice_vals, values[5:10])

# integer-indexed series, not starting at 0, with step > 1
values = np.random.rand(30)
idx = pd.RangeIndex(start=5, stop=65, step=2)
ts = TimeSeries.from_times_and_values(idx, values)
slice_vals = ts.slice(11, 21).values(copy=False).flatten()
np.testing.assert_equal(slice_vals, values[3:8])

# test cases where start and/or stop are not in the series

# n points, base case
seriesD = test_series.slice_n_points_after(pd.Timestamp("20130102"), n=3)
test_case.assertEqual(seriesD.start_time(), pd.Timestamp("20130102"))
Expand All @@ -256,6 +316,29 @@ def helper_test_slice(test_case, test_series: TimeSeries):
test_case.assertEqual(seriesG.start_time(), pd.Timestamp("20130101"))
test_case.assertEqual(seriesG.end_time(), pd.Timestamp("20130107"))

# integer indexed series, step = 1, timestamps not in series
values = np.random.rand(30)
idx = pd.RangeIndex(start=0, stop=30, step=1)
ts = TimeSeries.from_times_and_values(idx, values)
# end timestamp further off, slice should be inclusive of last timestamp:
slice_vals = ts.slice(10, 30).values(copy=False).flatten()
np.testing.assert_equal(slice_vals, values[10:])
slice_vals = ts.slice(10, 32).values(copy=False).flatten()
np.testing.assert_equal(slice_vals, values[10:])

# end timestamp within the series make it exclusive:
slice_vals = ts.slice(10, 29).values(copy=False).flatten()
np.testing.assert_equal(slice_vals, values[10:29])

# integer indexed series, step > 1, timestamps not in series
idx = pd.RangeIndex(start=0, stop=60, step=2)
ts = TimeSeries.from_times_and_values(idx, values)
slice_vals = ts.slice(11, 31).values(copy=False).flatten()
np.testing.assert_equal(slice_vals, values[5:15])

slice_ts = ts.slice(40, 60)
test_case.assertEqual(ts.end_time(), slice_ts.end_time())

@staticmethod
def helper_test_split(test_case, test_series: TimeSeries):
seriesA, seriesB = test_series.split_after(pd.Timestamp("20130104"))
Expand Down Expand Up @@ -1029,6 +1112,12 @@ def test_gaps(self):
).all()
)

# test gaps detection on integer-indexed series
values = np.array([1, 2, np.nan, np.nan, 3, 4, np.nan, 6])
times = pd.RangeIndex(8)
ts = TimeSeries.from_times_and_values(times, values)
np.testing.assert_equal(ts.gaps().values, np.array([[2, 3, 2], [6, 6, 1]]))

def test_longest_contiguous_slice(self):
times = pd.date_range("20130101", "20130111")
pd_series1 = pd.Series(
Expand Down
4 changes: 2 additions & 2 deletions darts/tests/test_timeseries_static_covariates.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from darts.dataprocessing.transformers import BoxCox, Scaler
from darts.tests.base_test_class import DartsBaseTestClass
from darts.timeseries import DEFAULT_GLOBAL_STATIC_COV_NAME, STATIC_COV_TAG
from darts.utils.timeseries_generation import _generate_index, linear_timeseries
from darts.utils.timeseries_generation import generate_index, linear_timeseries


class TimeSeriesStaticCovariateTestCase(DartsBaseTestClass):
Expand All @@ -25,7 +25,7 @@ def setUpClass(cls):
pd.concat(
[
pd.DataFrame(
_generate_index(start=pd.Timestamp(2010, 1, 1), length=len_ts)
generate_index(start=pd.Timestamp(2010, 1, 1), length=len_ts)
)
]
* n_groups,
Expand Down
Loading