Merge pull request #28 from WenjieDu/dev

Merge dev into main
WenjieDu · Dec 20, 2022 · 0d2f36a · 0d2f36a
2 parents f947a5e + c9955a2
commit 0d2f36a
Show file tree

Hide file tree

Showing 9 changed files with 92 additions and 37 deletions.
diff --git a/.github/workflows/greetings.yml b/.github/workflows/greetings.yml
@@ -1,6 +1,11 @@
 name: Autoreply to Issues Opened for PyPOTS
 
-on: [issues]
+on:
+  issues:
+    types: [opened]
+  pull_request:
+    branches: [main]
+    types: [opened]
 
 jobs:
   greeting:
@@ -11,4 +16,25 @@ jobs:
     - uses: actions/first-interaction@v1
       with:
         repo-token: ${{ secrets.ACCESS_TOKEN }}
-        issue-message: "Hi there,<br><br>Thank you so much for your attention to PyPOTS! If you find PyPOTS helpful to your work, please star⭐️ this repository. Your star is your recognition, which can help more people notice PyPOTS and grow PyPOTS community. It matters and is definitely a kind of contribution.<br><br>I have received your message and will respond ASAP. Thank you for your patience! 😃<br><br>Best,<br>Wenjie"
+        issue-message: |
+          Hi there 👋,
+          
+          Thank you so much for your attention to PyPOTS! If you find PyPOTS helpful to your work, please star⭐️ this repository. Your star is your recognition, which can help more people notice PyPOTS and grow PyPOTS community. It matters and is definitely a kind of contribution to the community.
+          
+          I have received your message and will respond ASAP. Thank you for your patience! 😃
+          
+          Best,
+          Wenjie
+
+        pr-message: |
+          Hi there 👋,
+          
+          Thank you for your contribution to PyPOTS!
+          
+          If you are trying to fix a bug, please reference the issue number in the description or give your details about the bug. 
+          If you are implementing a feature request, please check with the maintainers that the feature will be accepted first.
+          
+          Best,
+          Wenjie
+          
+          
diff --git a/.github/workflows/testing.yml b/.github/workflows/testing.yml
@@ -44,9 +44,9 @@ jobs:
               run: |
                   # run tests separately here due to Segmentation Fault in test_clustering when run all in 
                   # one command with `pytest` on MacOS. Bugs not catched, so this is a trade-off to avoid SF.
-                  python -m pytest pypots/tests/test_clustering.py -n auto --cov=pypots 
-                  python -m pytest pypots/tests/test_classification.py -n auto --cov=pypots --cov-append 
-                  python -m pytest pypots/tests/test_imputation.py -n auto --cov=pypots --cov-append  
+                  python -m pytest -rA pypots/tests/test_classification.py -n auto --cov=pypots --cov-append 
+                  python -m pytest -rA pypots/tests/test_imputation.py -n auto --cov=pypots --cov-append  
+                  python -m pytest -rA pypots/tests/test_clustering.py -n auto --cov=pypots 
 
             - name: Generate the LCOV report
               run: |

diff --git a/README.md b/README.md
@@ -56,18 +56,44 @@ Visit [TSDB](https://github.com/WenjieDu/TSDB) right now to know more about this
 Install the latest release from PyPI:
 > pip install pypots
 
+<details open>
+<summary><b>Below is an example applying SAITS in PyPOTS to impute missing values in the dataset PhysioNet2012:</b></summary>
+
+``` python
+import numpy as np
+from sklearn.preprocessing import StandardScaler
+from pypots.data import load_specific_dataset, mcar, masked_fill
+from pypots.imputation import SAITS
+from pypots.utils.metrics import cal_mae
+# Data preprocessing. Tedious, but PyPOTS can help. 🤓
+data = load_specific_dataset('physionet_2012')  # PyPOTS will automatically download and extract it.
+X = data['X']
+num_samples = len(X['RecordID'].unique())
+X = X.drop('RecordID', axis = 1)
+X = StandardScaler().fit_transform(X.to_numpy())
+X = X.reshape(num_samples, 48, -1)
+X_intact, X, missing_mask, indicating_mask = mcar(X, 0.1) # hold out 10% observed values as ground truth
+X = masked_fill(X, 1 - missing_mask, np.nan)
+# Model training. This is PyPOTS showtime. 💪
+saits = SAITS(n_steps=48, n_features=37, n_layers=2, d_model=256, d_inner=128, n_head=4, d_k=64, d_v=64, dropout=0.1, epochs=10)
+saits.fit(X)  # train the model. Here I use the whole dataset as the training set, because ground truth is not visible to the model.
+imputation = saits.impute(X)  # impute the originally-missing values and artificially-missing values
+mae = cal_mae(imputation, X_intact, indicating_mask)  # calculate mean absolute error on the ground truth (artificially-missing values)
+```
+</details>
+
 ## ❖ Available Algorithms
-| Task                          | Type           | Algorithm                                                                 | Year | Reference |        
-|-------------------------------|----------------|---------------------------------------------------------------------------|------|-----------|
-| Imputation                    | Neural Network | SAITS: Self-Attention-based Imputation for Time Series                    | 2022 | [^1]      |
-| Imputation                    | Neural Network | Transformer                                                               | 2017 | [^2] [^1] |
-| Imputation,<br>Classification | Neural Network | BRITS (Bidirectional Recurrent Imputation for Time Series)                | 2018 | [^3]      |
-| Imputation                    | Naive          | LOCF (Last Observation Carried Forward)                                   | -    | -         |
-| Classification                | Neural Network | GRU-D                                                                     | 2018 | [^4]      |
-| Classification                | Neural Network | Raindrop                                                                  | 2022 | [^5]      |
-| Clustering                    | Neural Network | CRLI (Clustering Representation Learning on Incomplete time-series data)  | 2021 | [^6]      |
-| Clustering                    | Neural Network | VaDER (Variational Deep Embedding with Recurrence)                        | 2019 | [^7]      |
-| Forecasting                   | Probabilistic  | BTTF (Bayesian Temporal Tensor Factorization)                             | 2021 | [^8]      |
+| Task                          | Type           | Algorithm                                                                | Year | Reference |        
+|-------------------------------|----------------|--------------------------------------------------------------------------|------|-----------|
+| Imputation                    | Neural Network | SAITS (Self-Attention-based Imputation for Time Series)                  | 2022 | [^1]      |
+| Imputation                    | Neural Network | Transformer                                                              | 2017 | [^2] [^1] |
+| Imputation,<br>Classification | Neural Network | BRITS (Bidirectional Recurrent Imputation for Time Series)               | 2018 | [^3]      |
+| Imputation                    | Naive          | LOCF (Last Observation Carried Forward)                                  | -    | -         |
+| Classification                | Neural Network | GRU-D                                                                    | 2018 | [^4]      |
+| Classification                | Neural Network | Raindrop                                                                 | 2022 | [^5]      |
+| Clustering                    | Neural Network | CRLI (Clustering Representation Learning on Incomplete time-series data) | 2021 | [^6]      |
+| Clustering                    | Neural Network | VaDER (Variational Deep Embedding with Recurrence)                       | 2019 | [^7]      |
+| Forecasting                   | Probabilistic  | BTTF (Bayesian Temporal Tensor Factorization)                            | 2021 | [^8]      |
 
 ## ❖ Reference
 If you find PyPOTS is helpful to your research, please cite it as below and ⭐️star this repository to make others notice this work. 🤗
@@ -89,7 +115,7 @@ or
 ## ❖ Attention 👀
 The documentation and tutorials are under construction. And a short paper introducing PyPOTS is on the way! 🚀 Stay tuned please!
 
-‼️ PyPOTS is currently under developing. If you like it and look forward to its growth, <ins>please give PyPOTS a star and watch it to keep you posted on its progress and to let me know that its development is meaningful</ins>. If you have any feedback, or want to contribute ideas/suggestions or share time-series related algorithms/papers, please join PyPOTS community and chat on <a alt='Slack Workspace' href='https://join.slack.com/t/pypots-dev/shared_invite/zt-1gq6ufwsi-p0OZdW~e9UW_IA4_f1OfxA'><img align='center' src='https://img.shields.io/badge/Slack-PyPOTS-grey?logo=slack&labelColor=4A154B&color=62BCE5'></a>, or create an issue. If you have any additional questions or have interests in collaboration, please take a look at [my GitHub profile](https://github.com/WenjieDu) and feel free to contact me 😃.
+‼️ PyPOTS is currently under developing. If you like it and look forward to its growth, <ins>please give PyPOTS a star and watch it to keep you posted on its progress and to let me know that its development is meaningful</ins>. If you have any feedback, or want to contribute ideas/suggestions or share time-series related algorithms/papers, please join PyPOTS community and chat on <a alt='Slack Workspace' href='https://join.slack.com/t/pypots-dev/shared_invite/zt-1gq6ufwsi-p0OZdW~e9UW_IA4_f1OfxA'><img align='center' src='https://img.shields.io/badge/Slack-PyPOTS-grey?logo=slack&labelColor=4A154B&color=62BCE5'></a>, or create an issue. If you have any additional questions or have interests in collaboration, please take a look at [my GitHub profile](https://github.com/WenjieDu) and feel free to contact me 🤝.
 
 Thank you all for your attention! 😃
 

diff --git a/environment.yml b/environment.yml
@@ -1,7 +1,6 @@
 name: pypots
 channels:
     - pytorch
-    - pyg
     - conda-forge
     - nodefaults
 dependencies:
@@ -14,8 +13,6 @@ dependencies:
     - conda-forge::tensorboard
     - conda-forge::pip
     - pytorch::pytorch==1.11.0
-    - pyg::pyg==2.0.4
     - pip:
-        - torchdiffeq
         - pycorruptor==0.0.4
         - tsdb==0.0.7
diff --git a/pypots/__version__.py b/pypots/__version__.py
@@ -21,4 +21,4 @@
 # Dev branch marker is: 'X.Y.dev' or 'X.Y.devN' where N is an integer.
 # 'X.Y.dev0' is the canonical version of 'X.Y.dev'
 
-version = "0.0.8"
+version = "0.0.9"
diff --git a/pypots/classification/raindrop.py b/pypots/classification/raindrop.py
@@ -26,12 +26,20 @@
 from torch.nn import init
 from torch.nn.parameter import Parameter
 from torch.utils.data import DataLoader
-from torch_geometric.nn.conv import MessagePassing
-from torch_geometric.nn.inits import glorot
-from torch_geometric.typing import PairTensor, Adj, OptTensor
-from torch_geometric.utils import softmax
-from torch_scatter import scatter
-from torch_sparse import SparseTensor
+
+try:
+    from torch_geometric.nn.conv import MessagePassing
+    from torch_geometric.nn.inits import glorot
+    from torch_geometric.typing import PairTensor, Adj, OptTensor
+    from torch_geometric.utils import softmax
+    from torch_scatter import scatter
+    from torch_sparse import SparseTensor
+except ImportError as e:
+    print(
+        f"{e}\n"
+        "torch_geometric is missing, "
+        "please install it with 'pip install torch_geometric' or 'conda install -c pyg pyg'"
+    )
 
 from pypots.classification.base import BaseNNClassifier
 from pypots.data.dataset_for_grud import DatasetForGRUD

diff --git a/pypots/tests/environment_test.yml b/pypots/tests/environment_test.yml
@@ -19,6 +19,5 @@ dependencies:
     - pytorch::pytorch==1.11.0
     - pyg::pyg==2.0.4
     - pip:
-        - torchdiffeq
         - pycorruptor==0.0.4
         - tsdb==0.0.7
diff --git a/requirements.txt b/requirements.txt
@@ -2,11 +2,8 @@ matplotlib
 numpy
 scikit_learn
 scipy
-torch >= 1.10
-torch_sparse == 0.6.13
-torch_scatter
-torch_geometric
+torch == 1.11.0
 tensorboard
 pandas
 pycorruptor
-tsdb
+tsdb
diff --git a/setup.py b/setup.py
@@ -21,8 +21,10 @@
         "neural networks",
         "machine learning",
         "deep learning",
+        "time-series analysis",
         "partially observed",
-        "time series",
+        "irregular sampled",
+        "incomplete time series",
         "missing data",
         "missing values",
     ],
@@ -34,9 +36,9 @@
         "scikit_learn",
         "scipy",
         "torch>=1.10",  # torch_sparse v0.6.12 requires 1.9<=torch<1.10, v0.6.13 needs torch>=1.10
-        "torch_sparse==0.6.13",
-        "torch_scatter",
-        "torch_geometric",
+        # "torch_sparse==0.6.13",
+        # "torch_scatter",
+        # "torch_geometric",
         "tensorboard",
         "pandas",
         "pycorruptor",