From abd3c6ccb13ad705c61fd7926706aa46736a62ed Mon Sep 17 00:00:00 2001 From: Wenjie Du Date: Sat, 26 Nov 2022 00:44:42 +0800 Subject: [PATCH 1/2] fix: the dual-processing bug in unified_data_for_test/gene_physionet2012(); --- pypots/data/load_specific_datasets.py | 2 +- pypots/tests/unified_data_for_test.py | 16 ---------------- 2 files changed, 1 insertion(+), 17 deletions(-) diff --git a/pypots/data/load_specific_datasets.py b/pypots/data/load_specific_datasets.py index 7c5352e3..9000ad92 100644 --- a/pypots/data/load_specific_datasets.py +++ b/pypots/data/load_specific_datasets.py @@ -43,7 +43,7 @@ def preprocess_physionet2012(data): def apply_func(df_temp): # pad and truncate to set the max length of samples as 48 missing = list(set(range(0, 48)).difference(set(df_temp["Time"]))) missing_part = pd.DataFrame({"Time": missing}) - df_temp = df_temp.append(missing_part, ignore_index=False, sort=False) # pad + df_temp = pd.concat([df_temp, missing_part], ignore_index=False, sort=False) # pad df_temp = df_temp.set_index("Time").sort_index().reset_index() df_temp = df_temp.iloc[:48] # truncate return df_temp diff --git a/pypots/tests/unified_data_for_test.py b/pypots/tests/unified_data_for_test.py index 2bdf89fc..ffb0f395 100644 --- a/pypots/tests/unified_data_for_test.py +++ b/pypots/tests/unified_data_for_test.py @@ -5,7 +5,6 @@ # Created by Wenjie Du # License: GLP-v3 -import pandas as pd import torch from sklearn.model_selection import train_test_split from sklearn.preprocessing import StandardScaler @@ -75,21 +74,6 @@ def gene_physionet2012(): # generate samples df = load_specific_dataset("physionet_2012") X = df["X"] - X = X.drop(df["static_features"], axis=1) - - def apply_func(df_temp): - missing = list(set(range(0, 48)).difference(set(df_temp["Time"]))) - missing_part = pd.DataFrame({"Time": missing}) - df_temp = df_temp.append(missing_part, ignore_index=False, sort=False) - df_temp = df_temp.set_index("Time").sort_index().reset_index() - df_temp = df_temp.iloc[:48] - return df_temp - - X = X.groupby("RecordID").apply(apply_func) - X = X.drop("RecordID", axis=1) - X = X.reset_index() - X = X.drop(["level_1", "Time"], axis=1) - y = df["y"] all_recordID = X["RecordID"].unique() train_set_ids, test_set_ids = train_test_split(all_recordID, test_size=0.2) From a7c9c12aef4e78ff3f93f465baa8b53f756d3494 Mon Sep 17 00:00:00 2001 From: Wenjie Du Date: Mon, 28 Nov 2022 10:37:08 +0800 Subject: [PATCH 2/2] doc: update README to add the link of PyPOTS workspace on Slack; --- README.md | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index ec51b752..010333a7 100644 --- a/README.md +++ b/README.md @@ -22,7 +22,7 @@ - + @@ -36,6 +36,10 @@ + + + +

⦿ `Motivation`: Due to all kinds of reasons like failure of collection sensors, communication error, and unexpected malfunction, missing values are common to see in time series from the real-world environment. This makes partially-observed time series (POTS) a pervasive problem in open-world modeling and prevents advanced data analysis. Although this problem is important, the area of data mining on POTS still lacks a dedicated toolkit. PyPOTS is created to fill in this blank. @@ -85,7 +89,7 @@ or ## ❖ Attention 👀 The documentation and tutorials are under construction. And a short paper introducing PyPOTS is on the way! 🚀 Stay tuned please! -‼️ PyPOTS is currently under developing. If you like it and look forward to its growth, please give PyPOTS a star and watch it to keep you posted on its progress and to let me know that its development is meaningful. If you have any feedback, or want to contribute ideas/suggestions or share time-series related algorithms/papers, please join PyPOTS community and , or create an issue. +‼️ PyPOTS is currently under developing. If you like it and look forward to its growth, please give PyPOTS a star and watch it to keep you posted on its progress and to let me know that its development is meaningful. If you have any feedback, or want to contribute ideas/suggestions or share time-series related algorithms/papers, please join PyPOTS community and chat on , or create an issue. If you have any additional questions or have interests in collaboration, please take a look at [my GitHub profile](https://github.com/WenjieDu) and feel free to contact me 😃. Thank you all for your attention! 😃