Skip to content

Commit

Permalink
refactor: move full implementation of sliding_window funcs to benchpo…
Browse files Browse the repository at this point in the history
…ts v0.2;
  • Loading branch information
WenjieDu committed Jul 2, 2024
1 parent efaeb72 commit 4015193
Show file tree
Hide file tree
Showing 4 changed files with 23 additions and 52 deletions.
69 changes: 20 additions & 49 deletions pypots/data/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,12 @@
# Created by Wenjie Du <wenjay.du@gmail.com>
# License: BSD-3-Clause

import math
from typing import Union

import benchpots
import numpy as np
import torch

from ..utils.logging import logger


def turn_data_into_specified_dtype(
data: Union[np.ndarray, torch.Tensor, list],
Expand Down Expand Up @@ -166,7 +164,11 @@ def parse_delta(
return delta


def sliding_window(time_series, window_len, sliding_len=None):
def sliding_window(
time_series: Union[np.ndarray, torch.Tensor],
window_len: int,
sliding_len: int = None,
) -> Union[np.ndarray, torch.Tensor]:
"""Generate time series samples with sliding window method, truncating windows from time-series data
with a given sequence length.
Expand All @@ -177,41 +179,27 @@ def sliding_window(time_series, window_len, sliding_len=None):
Parameters
----------
time_series : np.ndarray,
time_series :
time series data, len(shape)=2, [total_length, feature_num]
window_len : int,
window_len :
The length of the sliding window, i.e. the number of time steps in the generated data samples.
sliding_len : int, default = None,
sliding_len :
The sliding length of the window for each moving step. It will be set as the same with n_steps if None.
Returns
-------
samples : np.ndarray,
samples :
The generated time-series data samples of shape [seq_len//sliding_len, n_steps, n_features].
"""
sliding_len = window_len if sliding_len is None else sliding_len
total_len = time_series.shape[0]
start_indices = np.asarray(range(total_len // sliding_len)) * sliding_len

# remove the last one if left length is not enough
if total_len - start_indices[-1] < window_len:
to_drop = math.ceil(window_len / sliding_len)
left_len = total_len - start_indices[-1]
start_indices = start_indices[:-to_drop]
logger.warning(
f"The last {to_drop} samples are dropped due to the left length {left_len} is not enough."
)

sample_collector = []
for idx in start_indices:
sample_collector.append(time_series[idx : idx + window_len])

samples = np.asarray(sample_collector).astype("float32")

return samples
return benchpots.utils.sliding_window(
time_series,
window_len,
sliding_len,
)


def inverse_sliding_window(X, sliding_len):
Expand All @@ -238,25 +226,8 @@ def inverse_sliding_window(X, sliding_len):
The restored time-series data with shape of [total_length, n_features].
"""
assert len(X.shape) == 3, f"X should be a 3D array, but got {X.shape}"
n_samples, window_size, n_features = X.shape

if sliding_len >= window_size:
if sliding_len > window_size:
logger.warning(
f"sliding_len {sliding_len} is larger than the window size {window_size}, "
f"hence there will be gaps between restored data."
)
restored_data = X.reshape(n_samples * window_size, n_features)
else:
collector = [X[0][:sliding_len]]
overlap = X[0][sliding_len:]
for x in X[1:]:
overlap_avg = (overlap + x[:-sliding_len]) / 2
collector.append(overlap_avg[:sliding_len])
overlap = np.concatenate(
[overlap_avg[sliding_len:], x[-sliding_len:]], axis=0
)
collector.append(overlap)
restored_data = np.concatenate(collector, axis=0)
return restored_data

return benchpots.utils.inverse_sliding_window(
X,
sliding_len,
)
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,4 @@ scikit-learn
torch>=1.10.0
tsdb>=0.4
pygrinder>=0.6
benchpots>=0.1
benchpots>=0.2
2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ basic =
torch>=1.10.0
tsdb>=0.4
pygrinder>=0.6
benchpots>=0.1
benchpots>=0.2

# dependencies that are optional, torch-geometric are only needed for model Raindrop
# but its installation takes too much time
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@
"torch>=1.10.0",
"tsdb>=0.4",
"pygrinder>=0.6",
"benchpots>=0.1",
"benchpots>=0.2",
],
python_requires=">=3.8.0",
setup_requires=["setuptools>=38.6.0"],
Expand Down

0 comments on commit 4015193

Please sign in to comment.