From 4015193fc6d7d5c330b624590518025c993869f0 Mon Sep 17 00:00:00 2001 From: Wenjie Du Date: Wed, 3 Jul 2024 00:27:05 +0800 Subject: [PATCH] refactor: move full implementation of sliding_window funcs to benchpots v0.2; --- pypots/data/utils.py | 69 +++++++++++++------------------------------- requirements.txt | 2 +- setup.cfg | 2 +- setup.py | 2 +- 4 files changed, 23 insertions(+), 52 deletions(-) diff --git a/pypots/data/utils.py b/pypots/data/utils.py index f6bc939b..7762ff7f 100644 --- a/pypots/data/utils.py +++ b/pypots/data/utils.py @@ -5,14 +5,12 @@ # Created by Wenjie Du # License: BSD-3-Clause -import math from typing import Union +import benchpots import numpy as np import torch -from ..utils.logging import logger - def turn_data_into_specified_dtype( data: Union[np.ndarray, torch.Tensor, list], @@ -166,7 +164,11 @@ def parse_delta( return delta -def sliding_window(time_series, window_len, sliding_len=None): +def sliding_window( + time_series: Union[np.ndarray, torch.Tensor], + window_len: int, + sliding_len: int = None, +) -> Union[np.ndarray, torch.Tensor]: """Generate time series samples with sliding window method, truncating windows from time-series data with a given sequence length. @@ -177,41 +179,27 @@ def sliding_window(time_series, window_len, sliding_len=None): Parameters ---------- - time_series : np.ndarray, + time_series : time series data, len(shape)=2, [total_length, feature_num] - window_len : int, + window_len : The length of the sliding window, i.e. the number of time steps in the generated data samples. - sliding_len : int, default = None, + sliding_len : The sliding length of the window for each moving step. It will be set as the same with n_steps if None. Returns ------- - samples : np.ndarray, + samples : The generated time-series data samples of shape [seq_len//sliding_len, n_steps, n_features]. """ - sliding_len = window_len if sliding_len is None else sliding_len - total_len = time_series.shape[0] - start_indices = np.asarray(range(total_len // sliding_len)) * sliding_len - - # remove the last one if left length is not enough - if total_len - start_indices[-1] < window_len: - to_drop = math.ceil(window_len / sliding_len) - left_len = total_len - start_indices[-1] - start_indices = start_indices[:-to_drop] - logger.warning( - f"The last {to_drop} samples are dropped due to the left length {left_len} is not enough." - ) - - sample_collector = [] - for idx in start_indices: - sample_collector.append(time_series[idx : idx + window_len]) - samples = np.asarray(sample_collector).astype("float32") - - return samples + return benchpots.utils.sliding_window( + time_series, + window_len, + sliding_len, + ) def inverse_sliding_window(X, sliding_len): @@ -238,25 +226,8 @@ def inverse_sliding_window(X, sliding_len): The restored time-series data with shape of [total_length, n_features]. """ - assert len(X.shape) == 3, f"X should be a 3D array, but got {X.shape}" - n_samples, window_size, n_features = X.shape - - if sliding_len >= window_size: - if sliding_len > window_size: - logger.warning( - f"sliding_len {sliding_len} is larger than the window size {window_size}, " - f"hence there will be gaps between restored data." - ) - restored_data = X.reshape(n_samples * window_size, n_features) - else: - collector = [X[0][:sliding_len]] - overlap = X[0][sliding_len:] - for x in X[1:]: - overlap_avg = (overlap + x[:-sliding_len]) / 2 - collector.append(overlap_avg[:sliding_len]) - overlap = np.concatenate( - [overlap_avg[sliding_len:], x[-sliding_len:]], axis=0 - ) - collector.append(overlap) - restored_data = np.concatenate(collector, axis=0) - return restored_data + + return benchpots.utils.inverse_sliding_window( + X, + sliding_len, + ) diff --git a/requirements.txt b/requirements.txt index 63397d47..6f4d8112 100644 --- a/requirements.txt +++ b/requirements.txt @@ -13,4 +13,4 @@ scikit-learn torch>=1.10.0 tsdb>=0.4 pygrinder>=0.6 -benchpots>=0.1 +benchpots>=0.2 diff --git a/setup.cfg b/setup.cfg index f25dd640..b027a9db 100644 --- a/setup.cfg +++ b/setup.cfg @@ -37,7 +37,7 @@ basic = torch>=1.10.0 tsdb>=0.4 pygrinder>=0.6 - benchpots>=0.1 + benchpots>=0.2 # dependencies that are optional, torch-geometric are only needed for model Raindrop # but its installation takes too much time diff --git a/setup.py b/setup.py index 13172658..37f3be90 100644 --- a/setup.py +++ b/setup.py @@ -58,7 +58,7 @@ "torch>=1.10.0", "tsdb>=0.4", "pygrinder>=0.6", - "benchpots>=0.1", + "benchpots>=0.2", ], python_requires=">=3.8.0", setup_requires=["setuptools>=38.6.0"],