diff --git a/README.md b/README.md
index 7559a304..686b3042 100644
--- a/README.md
+++ b/README.md
@@ -5,12 +5,16 @@
-
+
-
+
-
+
+
+
+
+
@@ -26,7 +30,7 @@
-
+
@@ -53,6 +57,10 @@ Visit [TSDB](https://github.com/WenjieDu/TSDB) right now to know more about this
## ❖ Installation
+PyPOTS now is available on ❗️
+
+Install it with `conda install pypots`, you may need to specify the channel with option `-c conda-forge`
+
Install the latest release from PyPI:
> pip install pypots
@@ -121,16 +129,16 @@ Thank you all for your attention! 😃
[^1]: Du, W., Cote, D., & Liu, Y. (2023). [SAITS: Self-Attention-based Imputation for Time Series](https://doi.org/10.1016/j.eswa.2023.119619). *Expert systems with applications*.
-[^2]: Vaswani, A., Shazeer, N.M., Parmar, N., Uszkoreit, J., Jones, L., Gomez, A.N., Kaiser, L., & Polosukhin, I. (2017). [Attention is All you Need](https://papers.nips.cc/paper/2017/hash/3f5ee243547dee91fbd053c1c4a845aa-Abstract.html). *NeurIPS* 2017.
-[^3]: Cao, W., Wang, D., Li, J., Zhou, H., Li, L., & Li, Y. (2018). [BRITS: Bidirectional Recurrent Imputation for Time Series](https://papers.nips.cc/paper/2018/hash/734e6bfcd358e25ac1db0a4241b95651-Abstract.html). *NeurIPS* 2018.
+[^2]: Vaswani, A., Shazeer, N.M., Parmar, N., Uszkoreit, J., Jones, L., Gomez, A.N., Kaiser, L., & Polosukhin, I. (2017). [Attention is All you Need](https://papers.nips.cc/paper/2017/hash/3f5ee243547dee91fbd053c1c4a845aa-Abstract.html). *NeurIPS 2017*.
+[^3]: Cao, W., Wang, D., Li, J., Zhou, H., Li, L., & Li, Y. (2018). [BRITS: Bidirectional Recurrent Imputation for Time Series](https://papers.nips.cc/paper/2018/hash/734e6bfcd358e25ac1db0a4241b95651-Abstract.html). *NeurIPS 2018*.
[^4]: Che, Z., Purushotham, S., Cho, K., Sontag, D.A., & Liu, Y. (2018). [Recurrent Neural Networks for Multivariate Time Series with Missing Values](https://www.nature.com/articles/s41598-018-24271-9). *Scientific Reports*.
-[^5]: Zhang, X., Zeman, M., Tsiligkaridis, T., & Zitnik, M. (2022). [Graph-Guided Network for Irregularly Sampled Multivariate Time Series](https://arxiv.org/abs/2110.05357). *ICLR* 2022.
-[^6]: Ma, Q., Chen, C., Li, S., & Cottrell, G. W. (2021). [Learning Representations for Incomplete Time Series Clustering](https://ojs.aaai.org/index.php/AAAI/article/view/17070). *AAAI* 2021.
+[^5]: Zhang, X., Zeman, M., Tsiligkaridis, T., & Zitnik, M. (2022). [Graph-Guided Network for Irregularly Sampled Multivariate Time Series](https://arxiv.org/abs/2110.05357). *ICLR 2022*.
+[^6]: Ma, Q., Chen, C., Li, S., & Cottrell, G. W. (2021). [Learning Representations for Incomplete Time Series Clustering](https://ojs.aaai.org/index.php/AAAI/article/view/17070). *AAAI 2021*.
[^7]: Jong, J.D., Emon, M.A., Wu, P., Karki, R., Sood, M., Godard, P., Ahmad, A., Vrooman, H.A., Hofmann-Apitius, M., & Fröhlich, H. (2019). [Deep learning for clustering of multivariate clinical patient trajectories with missing values](https://academic.oup.com/gigascience/article/8/11/giz134/5626377). *GigaScience*.
[^8]: Chen, X., & Sun, L. (2021). [Bayesian Temporal Factorization for Multidimensional Time Series Prediction](https://arxiv.org/abs/1910.06366). *IEEE transactions on pattern analysis and machine intelligence*.
🏠 Visits
-
+
diff --git a/docs/index.rst b/docs/index.rst
index 07a62304..435125bf 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -97,8 +97,8 @@ Below is an example applying SAITS in PyPOTS to impute missing values in the dat
============================== ================ ========================================================================= ====== =========
Task Type Algorithm Year Reference
============================== ================ ========================================================================= ====== =========
-Imputation Neural Network SAITS (Self-Attention-based Imputation for Time Series) 2022 :cite:`du2022SAITS`
-Imputation Neural Network Transformer 2017 :cite:`vaswani2017Transformer`, :cite:`du2022SAITS`
+Imputation Neural Network SAITS (Self-Attention-based Imputation for Time Series) 2022 :cite:`du2023SAITS`
+Imputation Neural Network Transformer 2017 :cite:`vaswani2017Transformer`, :cite:`du2023SAITS`
Imputation, Classification Neural Network BRITS (Bidirectional Recurrent Imputation for Time Series) 2018 :cite:`cao2018BRITS`
Imputation Naive LOCF (Last Observation Carried Forward) / /
Classification Neural Network GRU-D 2018 :cite:`che2018GRUD`
diff --git a/docs/references.bib b/docs/references.bib
index 9a06b474..a0735aa6 100644
--- a/docs/references.bib
+++ b/docs/references.bib
@@ -1,403 +1,400 @@
@article{cao2018BRITS,
- title = {{{BRITS}}: {{Bidirectional Recurrent Imputation}} for {{Time Series}}},
- author = {Cao, Wei and Wang, Dong and Li, Jian and Zhou, Hao and Li, Lei and Li, Yitan},
- year = {2018},
- month = may,
- journal = {arXiv:1805.10572 [cs, stat]},
- eprint = {1805.10572},
- eprinttype = {arxiv},
- primaryclass = {cs, stat},
- url = {http://arxiv.org/abs/1805.10572},
- archiveprefix = {arXiv},
- keywords = {Computer Science - Machine Learning,Statistics - Machine Learning}
+title = {{{BRITS}}: {{Bidirectional Recurrent Imputation}} for {{Time Series}}},
+author = {Cao, Wei and Wang, Dong and Li, Jian and Zhou, Hao and Li, Lei and Li, Yitan},
+year = {2018},
+month = may,
+journal = {arXiv:1805.10572 [cs, stat]},
+eprint = {1805.10572},
+eprinttype = {arxiv},
+primaryclass = {cs, stat},
+url = {http://arxiv.org/abs/1805.10572},
+archiveprefix = {arXiv},
+keywords = {Computer Science - Machine Learning,Statistics - Machine Learning}
}
@article{che2018GRUD,
- title = {Recurrent {{Neural Networks}} for {{Multivariate Time Series}} with {{Missing Values}}},
- author = {Che, Zhengping and Purushotham, Sanjay and Cho, Kyunghyun and Sontag, David and Liu, Yan},
- year = {2018},
- month = apr,
- journal = {Scientific Reports},
- volume = {8},
- number = {1},
- pages = {6085},
- publisher = {{Nature Publishing Group}},
- issn = {2045-2322},
- doi = {10.1038/s41598-018-24271-9},
- url = {https://www.nature.com/articles/s41598-018-24271-9},
- copyright = {2018 The Author(s)}
+title = {Recurrent {{Neural Networks}} for {{Multivariate Time Series}} with {{Missing Values}}},
+author = {Che, Zhengping and Purushotham, Sanjay and Cho, Kyunghyun and Sontag, David and Liu, Yan},
+year = {2018},
+month = apr,
+journal = {Scientific Reports},
+volume = {8},
+number = {1},
+pages = {6085},
+publisher = {{Nature Publishing Group}},
+issn = {2045-2322},
+doi = {10.1038/s41598-018-24271-9},
+url = {https://www.nature.com/articles/s41598-018-24271-9},
+copyright = {2018 The Author(s)}
}
@article{chen2021BTMF,
- title = {Bayesian {{Temporal Factorization}} for {{Multidimensional Time Series Prediction}}},
- author = {Chen, Xinyu and Sun, Lijun},
- year = {2021},
- journal = {IEEE Transactions on Pattern Analysis and Machine Intelligence},
- eprint = {1910.06366},
- eprinttype = {arxiv},
- pages = {1--1},
- issn = {0162-8828, 2160-9292, 1939-3539},
- doi = {10.1109/TPAMI.2021.3066551},
- url = {http://arxiv.org/abs/1910.06366},
- archiveprefix = {arXiv},
- keywords = {Computer Science - Machine Learning,Statistics - Machine Learning}
+title = {Bayesian {{Temporal Factorization}} for {{Multidimensional Time Series Prediction}}},
+author = {Chen, Xinyu and Sun, Lijun},
+year = {2021},
+journal = {IEEE Transactions on Pattern Analysis and Machine Intelligence},
+eprint = {1910.06366},
+eprinttype = {arxiv},
+pages = {1--1},
+issn = {0162-8828, 2160-9292, 1939-3539},
+doi = {10.1109/TPAMI.2021.3066551},
+url = {http://arxiv.org/abs/1910.06366},
+archiveprefix = {arXiv},
+keywords = {Computer Science - Machine Learning,Statistics - Machine Learning}
}
@article{choi2020RDISRandom,
- title = {{{RDIS}}: {{Random Drop Imputation}} with {{Self-Training}} for {{Incomplete Time Series Data}}},
- author = {Choi, Tae-Min and Kang, Ji-Su and Kim, Jong-Hwan},
- year = {2020},
- month = oct,
- journal = {arXiv:2010.10075 [cs, stat]},
- eprint = {2010.10075},
- eprinttype = {arxiv},
- primaryclass = {cs, stat},
- url = {http://arxiv.org/abs/2010.10075},
- archiveprefix = {arXiv},
- keywords = {Computer Science - Machine Learning,Statistics - Machine Learning}
+title = {{{RDIS}}: {{Random Drop Imputation}} with {{Self-Training}} for {{Incomplete Time Series Data}}},
+author = {Choi, Tae-Min and Kang, Ji-Su and Kim, Jong-Hwan},
+year = {2020},
+month = oct,
+journal = {arXiv:2010.10075 [cs, stat]},
+eprint = {2010.10075},
+eprinttype = {arxiv},
+primaryclass = {cs, stat},
+url = {http://arxiv.org/abs/2010.10075},
+archiveprefix = {arXiv},
+keywords = {Computer Science - Machine Learning,Statistics - Machine Learning}
}
@article{cini2021MultivariateTime,
- title = {Multivariate {{Time Series Imputation}} by {{Graph Neural Networks}}},
- author = {Cini, Andrea and Marisca, Ivan and Alippi, Cesare},
- year = {2021},
- month = sep,
- journal = {arXiv:2108.00298 [cs]},
- eprint = {2108.00298},
- eprinttype = {arxiv},
- primaryclass = {cs},
- url = {http://arxiv.org/abs/2108.00298},
- archiveprefix = {arXiv},
- keywords = {Computer Science - Artificial Intelligence,Computer Science - Machine Learning}
+title = {Multivariate {{Time Series Imputation}} by {{Graph Neural Networks}}},
+author = {Cini, Andrea and Marisca, Ivan and Alippi, Cesare},
+year = {2021},
+month = sep,
+journal = {arXiv:2108.00298 [cs]},
+eprint = {2108.00298},
+eprinttype = {arxiv},
+primaryclass = {cs},
+url = {http://arxiv.org/abs/2108.00298},
+archiveprefix = {arXiv},
+keywords = {Computer Science - Artificial Intelligence,Computer Science - Machine Learning}
}
@inproceedings{costa2018MissingData,
- title = {Missing {{Data Imputation}} via {{Denoising Autoencoders}}: {{The Untold Story}}},
- booktitle = {Advances in {{Intelligent Data Analysis XVII}}},
- author = {Costa, Adriana Fonseca and Santos, Miriam Seoane and Soares, Jastin Pompeu and Abreu, Pedro Henriques},
- editor = {Duivesteijn, Wouter and Siebes, Arno and Ukkonen, Antti},
- year = {2018},
- series = {Lecture {{Notes}} in {{Computer Science}}},
- pages = {87--98},
- publisher = {{Springer International Publishing}},
- address = {{Cham}},
- doi = {10.1007/978-3-030-01768-2_8},
- isbn = {978-3-030-01768-2},
- keywords = {Data imputation,Denoising autoencoders,Missing data,Missing mechanisms}
+title = {Missing {{Data Imputation}} via {{Denoising Autoencoders}}: {{The Untold Story}}},
+booktitle = {Advances in {{Intelligent Data Analysis XVII}}},
+author = {Costa, Adriana Fonseca and Santos, Miriam Seoane and Soares, Jastin Pompeu and Abreu, Pedro Henriques},
+editor = {Duivesteijn, Wouter and Siebes, Arno and Ukkonen, Antti},
+year = {2018},
+series = {Lecture {{Notes}} in {{Computer Science}}},
+pages = {87--98},
+publisher = {{Springer International Publishing}},
+address = {{Cham}},
+doi = {10.1007/978-3-030-01768-2_8},
+isbn = {978-3-030-01768-2},
+keywords = {Data imputation,Denoising autoencoders,Missing data,Missing mechanisms}
}
@article{dejong2019VaDER,
- title = {Deep Learning for Clustering of Multivariate Clinical Patient Trajectories with Missing Values},
- author = {{de~Jong}, Johann and Emon, Mohammad Asif and Wu, Ping and Karki, Reagon and Sood, Meemansa and Godard, Patrice and Ahmad, Ashar and Vrooman, Henri and {Hofmann-Apitius}, Martin and Fr{\"o}hlich, Holger},
- year = {2019},
- month = nov,
- journal = {GigaScience},
- volume = {8},
- number = {11},
- pages = {giz134},
- issn = {2047-217X},
- doi = {10.1093/gigascience/giz134},
- url = {https://doi.org/10.1093/gigascience/giz134}
+title = {Deep Learning for Clustering of Multivariate Clinical Patient Trajectories with Missing Values},
+author = {{de~Jong}, Johann and Emon, Mohammad Asif and Wu, Ping and Karki, Reagon and Sood, Meemansa and Godard, Patrice and Ahmad, Ashar and Vrooman, Henri and {Hofmann-Apitius}, Martin and Fr{\"o}hlich, Holger},
+year = {2019},
+month = nov,
+journal = {GigaScience},
+volume = {8},
+number = {11},
+pages = {giz134},
+issn = {2047-217X},
+doi = {10.1093/gigascience/giz134},
+url = {https://doi.org/10.1093/gigascience/giz134}
}
-@article{du2022SAITS,
- title = {{{SAITS}}: {{Self-Attention-based Imputation}} for {{Time Series}}},
- author = {Du, Wenjie and C{\^o}t{\'e}, David and Liu, Yan},
- year = {2022},
- month = feb,
- journal = {arXiv:2202.08516 [cs]},
- eprint = {2202.08516},
- eprinttype = {arxiv},
- primaryclass = {cs},
- url = {http://arxiv.org/abs/2202.08516},
- archiveprefix = {arXiv},
- keywords = {Computer Science - Machine Learning}
+@article{du2023SAITS,
+title = {{SAITS: Self-Attention-based Imputation for Time Series}},
+journal = {Expert Systems with Applications},
+volume = {219},
+pages = {119619},
+year = {2023},
+issn = {0957-4174},
+doi = {https://doi.org/10.1016/j.eswa.2023.119619},
+url = {https://www.sciencedirect.com/science/article/pii/S0957417423001203},
+author = {Wenjie Du and David Cote and Yan Liu},
}
-
@article{fortuin2020GPVAEDeep,
- title = {{{GP-VAE}}: {{Deep Probabilistic Time Series Imputation}}},
- author = {Fortuin, Vincent and Baranchuk, Dmitry and R{\"a}tsch, Gunnar and Mandt, Stephan},
- year = {2020},
- month = feb,
- journal = {arXiv:1907.04155 [cs, stat]},
- eprint = {1907.04155},
- eprinttype = {arxiv},
- primaryclass = {cs, stat},
- url = {http://arxiv.org/abs/1907.04155},
- archiveprefix = {arXiv},
- keywords = {Computer Science - Machine Learning,Statistics - Machine Learning}
+title = {{{GP-VAE}}: {{Deep Probabilistic Time Series Imputation}}},
+author = {Fortuin, Vincent and Baranchuk, Dmitry and R{\"a}tsch, Gunnar and Mandt, Stephan},
+year = {2020},
+month = feb,
+journal = {arXiv:1907.04155 [cs, stat]},
+eprint = {1907.04155},
+eprinttype = {arxiv},
+primaryclass = {cs, stat},
+url = {http://arxiv.org/abs/1907.04155},
+archiveprefix = {arXiv},
+keywords = {Computer Science - Machine Learning,Statistics - Machine Learning}
}
@article{horn2019SeFT,
- title = {Set {{Functions}} for {{Time Series}}},
- author = {Horn, Max and Moor, Michael and Bock, Christian and Rieck, Bastian and Borgwardt, Karsten},
- year = {2019},
- month = sep,
- url = {https://arxiv.org/abs/1909.12064v3}
+title = {Set {{Functions}} for {{Time Series}}},
+author = {Horn, Max and Moor, Michael and Bock, Christian and Rieck, Bastian and Borgwardt, Karsten},
+year = {2019},
+month = sep,
+url = {https://arxiv.org/abs/1909.12064v3}
}
@article{hubert1985AdjustedRI,
- title = {Comparing Partitions},
- author = {Hubert, Lawrence and Arabie, Phipps},
- year = {1985},
- month = dec,
- journal = {Journal of Classification},
- volume = {2},
- number = {1},
- pages = {193--218},
- issn = {1432-1343},
- doi = {10.1007/BF01908075},
- url = {https://doi.org/10.1007/BF01908075},
- keywords = {Consensus indices,Measures of agreement,Measures of association}
+title = {Comparing Partitions},
+author = {Hubert, Lawrence and Arabie, Phipps},
+year = {1985},
+month = dec,
+journal = {Journal of Classification},
+volume = {2},
+number = {1},
+pages = {193--218},
+issn = {1432-1343},
+doi = {10.1007/BF01908075},
+url = {https://doi.org/10.1007/BF01908075},
+keywords = {Consensus indices,Measures of agreement,Measures of association}
}
@article{little1988TestMCAR,
- title = {A {{Test}} of {{Missing Completely}} at {{Random}} for {{Multivariate Data}} with {{Missing Values}}},
- author = {Little, Roderick J. A.},
- year = {1988},
- journal = {Journal of the American Statistical Association},
- volume = {83},
- number = {404},
- pages = {1198--1202},
- publisher = {{[American Statistical Association, Taylor \& Francis, Ltd.]}},
- issn = {0162-1459},
- doi = {10.2307/2290157},
- url = {https://www.jstor.org/stable/2290157}
+title = {A {{Test}} of {{Missing Completely}} at {{Random}} for {{Multivariate Data}} with {{Missing Values}}},
+author = {Little, Roderick J. A.},
+year = {1988},
+journal = {Journal of the American Statistical Association},
+volume = {83},
+number = {404},
+pages = {1198--1202},
+publisher = {{[American Statistical Association, Taylor \& Francis, Ltd.]}},
+issn = {0162-1459},
+doi = {10.2307/2290157},
+url = {https://www.jstor.org/stable/2290157}
}
@inproceedings{liu2019NAOMI,
- title = {{{NAOMI}}: {{Non-Autoregressive Multiresolution Sequence Imputation}}},
- booktitle = {{{arXiv}}:1901.10946 [Cs, Stat]},
- author = {Liu, Yukai and Yu, Rose and Zheng, Stephan and Zhan, Eric and Yue, Yisong},
- year = {2019},
- month = oct,
- eprint = {1901.10946},
- eprinttype = {arxiv},
- primaryclass = {cs, stat},
- url = {http://arxiv.org/abs/1901.10946},
- archiveprefix = {arXiv},
- keywords = {Computer Science - Machine Learning,Statistics - Machine Learning}
+title = {{{NAOMI}}: {{Non-Autoregressive Multiresolution Sequence Imputation}}},
+booktitle = {{{arXiv}}:1901.10946 [Cs, Stat]},
+author = {Liu, Yukai and Yu, Rose and Zheng, Stephan and Zhan, Eric and Yue, Yisong},
+year = {2019},
+month = oct,
+eprint = {1901.10946},
+eprinttype = {arxiv},
+primaryclass = {cs, stat},
+url = {http://arxiv.org/abs/1901.10946},
+archiveprefix = {arXiv},
+keywords = {Computer Science - Machine Learning,Statistics - Machine Learning}
}
@incollection{luo2018MultivariateTime,
- title = {Multivariate {{Time Series Imputation}} with {{Generative Adversarial Networks}}},
- booktitle = {Advances in {{Neural Information Processing Systems}} 31},
- author = {Luo, Yonghong and Cai, Xiangrui and ZHANG, Ying and Xu, Jun and {xiaojie}, Yuan},
- editor = {Bengio, S. and Wallach, H. and Larochelle, H. and Grauman, K. and {Cesa-Bianchi}, N. and Garnett, R.},
- year = {2018},
- pages = {1596--1607},
- publisher = {{Curran Associates, Inc.}},
- url = {http://papers.nips.cc/paper/7432-multivariate-time-series-imputation-with-generative-adversarial-networks.pdf}
+title = {Multivariate {{Time Series Imputation}} with {{Generative Adversarial Networks}}},
+booktitle = {Advances in {{Neural Information Processing Systems}} 31},
+author = {Luo, Yonghong and Cai, Xiangrui and ZHANG, Ying and Xu, Jun and {xiaojie}, Yuan},
+editor = {Bengio, S. and Wallach, H. and Larochelle, H. and Grauman, K. and {Cesa-Bianchi}, N. and Garnett, R.},
+year = {2018},
+pages = {1596--1607},
+publisher = {{Curran Associates, Inc.}},
+url = {http://papers.nips.cc/paper/7432-multivariate-time-series-imputation-with-generative-adversarial-networks.pdf}
}
@article{ma2019CDSA,
- title = {{{CDSA}}: {{Cross-Dimensional Self-Attention}} for {{Multivariate}}, {{Geo-tagged Time Series Imputation}}},
- author = {Ma, Jiawei and Shou, Zheng and Zareian, Alireza and Mansour, Hassan and Vetro, Anthony and Chang, Shih-Fu},
- year = {2019},
- month = aug,
- journal = {arXiv:1905.09904 [cs, stat]},
- eprint = {1905.09904},
- eprinttype = {arxiv},
- primaryclass = {cs, stat},
- url = {http://arxiv.org/abs/1905.09904},
- archiveprefix = {arXiv},
- keywords = {Computer Science - Machine Learning,Statistics - Machine Learning}
+title = {{{CDSA}}: {{Cross-Dimensional Self-Attention}} for {{Multivariate}}, {{Geo-tagged Time Series Imputation}}},
+author = {Ma, Jiawei and Shou, Zheng and Zareian, Alireza and Mansour, Hassan and Vetro, Anthony and Chang, Shih-Fu},
+year = {2019},
+month = aug,
+journal = {arXiv:1905.09904 [cs, stat]},
+eprint = {1905.09904},
+eprinttype = {arxiv},
+primaryclass = {cs, stat},
+url = {http://arxiv.org/abs/1905.09904},
+archiveprefix = {arXiv},
+keywords = {Computer Science - Machine Learning,Statistics - Machine Learning}
}
@article{ma2021CRLI,
- title = {Learning {{Representations}} for {{Incomplete Time Series Clustering}}},
- author = {Ma, Qianli and Chen, Chuxin and Li, Sen and Cottrell, Garrison W.},
- year = {2021},
- month = may,
- journal = {Proceedings of the AAAI Conference on Artificial Intelligence},
- volume = {35},
- number = {10},
- pages = {8837--8846},
- issn = {2374-3468},
- url = {https://ojs.aaai.org/index.php/AAAI/article/view/17070},
- copyright = {Copyright (c) 2021 Association for the Advancement of Artificial Intelligence},
- keywords = {Time-Series/Data Streams}
+title = {Learning {{Representations}} for {{Incomplete Time Series Clustering}}},
+author = {Ma, Qianli and Chen, Chuxin and Li, Sen and Cottrell, Garrison W.},
+year = {2021},
+month = may,
+journal = {Proceedings of the AAAI Conference on Artificial Intelligence},
+volume = {35},
+number = {10},
+pages = {8837--8846},
+issn = {2374-3468},
+url = {https://ojs.aaai.org/index.php/AAAI/article/view/17070},
+copyright = {Copyright (c) 2021 Association for the Advancement of Artificial Intelligence},
+keywords = {Time-Series/Data Streams}
}
@article{miao2021SSGAN,
- title = {Generative {{Semi-supervised Learning}} for {{Multivariate Time Series Imputation}}},
- author = {Miao, Xiaoye and Wu, Yangyang and Wang, Jun and Gao, Yunjun and Mao, Xudong and Yin, Jianwei},
- year = {2021},
- month = may,
- journal = {Proceedings of the AAAI Conference on Artificial Intelligence},
- volume = {35},
- number = {10},
- pages = {8983--8991},
- issn = {2374-3468},
- url = {https://ojs.aaai.org/index.php/AAAI/article/view/17086},
- copyright = {Copyright (c) 2021 Association for the Advancement of Artificial Intelligence},
- keywords = {Time-Series/Data Streams}
+title = {Generative {{Semi-supervised Learning}} for {{Multivariate Time Series Imputation}}},
+author = {Miao, Xiaoye and Wu, Yangyang and Wang, Jun and Gao, Yunjun and Mao, Xudong and Yin, Jianwei},
+year = {2021},
+month = may,
+journal = {Proceedings of the AAAI Conference on Artificial Intelligence},
+volume = {35},
+number = {10},
+pages = {8983--8991},
+issn = {2374-3468},
+url = {https://ojs.aaai.org/index.php/AAAI/article/view/17086},
+copyright = {Copyright (c) 2021 Association for the Advancement of Artificial Intelligence},
+keywords = {Time-Series/Data Streams}
}
@article{mikalsen2017TimeSeries,
- title = {Time {{Series Cluster Kernel}} for {{Learning Similarities}} between {{Multivariate Time Series}} with {{Missing Data}}},
- author = {Mikalsen, Karl {\O}yvind and Bianchi, Filippo Maria and {Soguero-Ruiz}, Cristina and Jenssen, Robert},
- year = {2017},
- month = jun,
- journal = {arXiv:1704.00794 [cs, stat]},
- eprint = {1704.00794},
- eprinttype = {arxiv},
- primaryclass = {cs, stat},
- url = {http://arxiv.org/abs/1704.00794},
- archiveprefix = {arXiv},
- keywords = {Computer Science - Machine Learning,Statistics - Machine Learning}
+title = {Time {{Series Cluster Kernel}} for {{Learning Similarities}} between {{Multivariate Time Series}} with {{Missing Data}}},
+author = {Mikalsen, Karl {\O}yvind and Bianchi, Filippo Maria and {Soguero-Ruiz}, Cristina and Jenssen, Robert},
+year = {2017},
+month = jun,
+journal = {arXiv:1704.00794 [cs, stat]},
+eprint = {1704.00794},
+eprinttype = {arxiv},
+primaryclass = {cs, stat},
+url = {http://arxiv.org/abs/1704.00794},
+archiveprefix = {arXiv},
+keywords = {Computer Science - Machine Learning,Statistics - Machine Learning}
}
@inproceedings{oh2021STINGSelfattention,
- title = {{{STING}}: {{Self-attention}} Based {{Time-series Imputation Networks}} Using {{GAN}}},
- booktitle = {2021 {{IEEE International Conference}} on {{Data Mining}} ({{ICDM}})},
- author = {Oh, Eunkyu and Kim, Taehun and Ji, Yunhu and Khyalia, Sushil},
- year = {2021},
- month = dec,
- pages = {1264--1269},
- issn = {2374-8486},
- doi = {10.1109/ICDM51629.2021.00155},
- keywords = {bidirectional RNN,Conferences,Correlation,Data collection,Deep learning,generative adversarial networks,Generative adversarial networks,Recurrent neural networks,self-attention,Time series analysis,time-series imputation}
+title = {{{STING}}: {{Self-attention}} Based {{Time-series Imputation Networks}} Using {{GAN}}},
+booktitle = {2021 {{IEEE International Conference}} on {{Data Mining}} ({{ICDM}})},
+author = {Oh, Eunkyu and Kim, Taehun and Ji, Yunhu and Khyalia, Sushil},
+year = {2021},
+month = dec,
+pages = {1264--1269},
+issn = {2374-8486},
+doi = {10.1109/ICDM51629.2021.00155},
+keywords = {bidirectional RNN,Conferences,Correlation,Data collection,Deep learning,generative adversarial networks,Generative adversarial networks,Recurrent neural networks,self-attention,Time series analysis,time-series imputation}
}
@article{oyvindmikalsen2021TimeSeries,
- title = {Time Series Cluster Kernels to Exploit Informative Missingness and Incomplete Label Information},
- author = {{\O}yvind Mikalsen, Karl and {Soguero-Ruiz}, Cristina and Maria Bianchi, Filippo and Revhaug, Arthur and Jenssen, Robert},
- year = {2021},
- month = jul,
- journal = {Pattern Recognition},
- volume = {115},
- pages = {107896},
- issn = {0031-3203},
- doi = {10.1016/j.patcog.2021.107896},
- url = {https://www.sciencedirect.com/science/article/pii/S0031320321000832},
- keywords = {Informative missingness,Kernel methods,Missing data,Multivariate time series,Semi-supervised learning}
+title = {Time Series Cluster Kernels to Exploit Informative Missingness and Incomplete Label Information},
+author = {{\O}yvind Mikalsen, Karl and {Soguero-Ruiz}, Cristina and Maria Bianchi, Filippo and Revhaug, Arthur and Jenssen, Robert},
+year = {2021},
+month = jul,
+journal = {Pattern Recognition},
+volume = {115},
+pages = {107896},
+issn = {0031-3203},
+doi = {10.1016/j.patcog.2021.107896},
+url = {https://www.sciencedirect.com/science/article/pii/S0031320321000832},
+keywords = {Informative missingness,Kernel methods,Missing data,Multivariate time series,Semi-supervised learning}
}
@article{rand1971RandIndex,
- title = {Objective {{Criteria}} for the {{Evaluation}} of {{Clustering Methods}}},
- author = {Rand, William M.},
- year = {1971},
- journal = {Journal of the American Statistical Association},
- volume = {66},
- number = {336},
- pages = {846--850},
- publisher = {{[American Statistical Association, Taylor \& Francis, Ltd.]}},
- issn = {0162-1459},
- doi = {10.2307/2284239},
- url = {https://www.jstor.org/stable/2284239}
+title = {Objective {{Criteria}} for the {{Evaluation}} of {{Clustering Methods}}},
+author = {Rand, William M.},
+year = {1971},
+journal = {Journal of the American Statistical Association},
+volume = {66},
+number = {336},
+pages = {846--850},
+publisher = {{[American Statistical Association, Taylor \& Francis, Ltd.]}},
+issn = {0162-1459},
+doi = {10.2307/2284239},
+url = {https://www.jstor.org/stable/2284239}
}
@article{shukla2021MultiTimeAttention,
- title = {Multi-{{Time Attention Networks}} for {{Irregularly Sampled Time Series}}},
- author = {Shukla, Satya Narayan and Marlin, Benjamin M.},
- year = {2021},
- month = jun,
- journal = {arXiv:2101.10318 [cs]},
- eprint = {2101.10318},
- eprinttype = {arxiv},
- primaryclass = {cs},
- url = {http://arxiv.org/abs/2101.10318},
- archiveprefix = {arXiv},
- keywords = {Computer Science - Artificial Intelligence,Computer Science - Machine Learning}
+title = {Multi-{{Time Attention Networks}} for {{Irregularly Sampled Time Series}}},
+author = {Shukla, Satya Narayan and Marlin, Benjamin M.},
+year = {2021},
+month = jun,
+journal = {arXiv:2101.10318 [cs]},
+eprint = {2101.10318},
+eprinttype = {arxiv},
+primaryclass = {cs},
+url = {http://arxiv.org/abs/2101.10318},
+archiveprefix = {arXiv},
+keywords = {Computer Science - Artificial Intelligence,Computer Science - Machine Learning}
}
@inproceedings{suo2020GLIMAGlobal,
- title = {{{GLIMA}}: {{Global}} and {{Local Time Series Imputation}} with {{Multi-directional Attention Learning}}},
- booktitle = {2020 {{IEEE International Conference}} on {{Big Data}} ({{Big Data}})},
- author = {Suo, Qiuling and Zhong, Weida and Xun, Guangxu and Sun, Jianhui and Chen, Changyou and Zhang, Aidong},
- year = {2020},
- month = dec,
- pages = {798--807},
- doi = {10.1109/BigData50022.2020.9378408},
- keywords = {Big Data,Conferences,Correlation,Missing Data,Recurrent Imputation,Recurrent neural networks,Self-Attention,Task analysis,Tensors,Time Series,Time series analysis}
+title = {{{GLIMA}}: {{Global}} and {{Local Time Series Imputation}} with {{Multi-directional Attention Learning}}},
+booktitle = {2020 {{IEEE International Conference}} on {{Big Data}} ({{Big Data}})},
+author = {Suo, Qiuling and Zhong, Weida and Xun, Guangxu and Sun, Jianhui and Chen, Changyou and Zhang, Aidong},
+year = {2020},
+month = dec,
+pages = {798--807},
+doi = {10.1109/BigData50022.2020.9378408},
+keywords = {Big Data,Conferences,Correlation,Missing Data,Recurrent Imputation,Recurrent neural networks,Self-Attention,Task analysis,Tensors,Time Series,Time series analysis}
}
@article{tang2019JointModeling,
- title = {Joint {{Modeling}} of {{Local}} and {{Global Temporal Dynamics}} for {{Multivariate Time Series Forecasting}} with {{Missing Values}}},
- author = {Tang, Xianfeng and Yao, Huaxiu and Sun, Yiwei and Aggarwal, Charu and Mitra, Prasenjit and Wang, Suhang},
- year = {2019},
- month = nov,
- journal = {arXiv:1911.10273 [cs, stat]},
- eprint = {1911.10273},
- eprinttype = {arxiv},
- primaryclass = {cs, stat},
- url = {http://arxiv.org/abs/1911.10273},
- archiveprefix = {arXiv},
- keywords = {Computer Science - Machine Learning,Statistics - Machine Learning}
+title = {Joint {{Modeling}} of {{Local}} and {{Global Temporal Dynamics}} for {{Multivariate Time Series Forecasting}} with {{Missing Values}}},
+author = {Tang, Xianfeng and Yao, Huaxiu and Sun, Yiwei and Aggarwal, Charu and Mitra, Prasenjit and Wang, Suhang},
+year = {2019},
+month = nov,
+journal = {arXiv:1911.10273 [cs, stat]},
+eprint = {1911.10273},
+eprinttype = {arxiv},
+primaryclass = {cs, stat},
+url = {http://arxiv.org/abs/1911.10273},
+archiveprefix = {arXiv},
+keywords = {Computer Science - Machine Learning,Statistics - Machine Learning}
}
@article{tashiro2021CSDI,
- title = {{{CSDI}}: {{Conditional Score-based Diffusion Models}} for {{Probabilistic Time Series Imputation}}},
- author = {Tashiro, Yusuke and Song, Jiaming and Song, Yang and Ermon, Stefano},
- year = {2021},
- month = oct,
- journal = {arXiv:2107.03502 [cs, stat]},
- eprint = {2107.03502},
- eprinttype = {arxiv},
- primaryclass = {cs, stat},
- url = {http://arxiv.org/abs/2107.03502},
- archiveprefix = {arXiv},
- keywords = {Computer Science - Machine Learning,Statistics - Machine Learning}
+title = {{{CSDI}}: {{Conditional Score-based Diffusion Models}} for {{Probabilistic Time Series Imputation}}},
+author = {Tashiro, Yusuke and Song, Jiaming and Song, Yang and Ermon, Stefano},
+year = {2021},
+month = oct,
+journal = {arXiv:2107.03502 [cs, stat]},
+eprint = {2107.03502},
+eprinttype = {arxiv},
+primaryclass = {cs, stat},
+url = {http://arxiv.org/abs/2107.03502},
+archiveprefix = {arXiv},
+keywords = {Computer Science - Machine Learning,Statistics - Machine Learning}
}
@inproceedings{vaswani2017Transformer,
- author = {Vaswani, Ashish and Shazeer, Noam and Parmar, Niki and Uszkoreit, Jakob and Jones, Llion and Gomez, Aidan N and Kaiser, \L ukasz and Polosukhin, Illia},
- booktitle = {Advances in Neural Information Processing Systems},
- editor = {I. Guyon and U. Von Luxburg and S. Bengio and H. Wallach and R. Fergus and S. Vishwanathan and R. Garnett},
- pages = {},
- publisher = {Curran Associates, Inc.},
- title = {Attention is All you Need},
- url = {https://proceedings.neurips.cc/paper/2017/file/3f5ee243547dee91fbd053c1c4a845aa-Paper.pdf},
- volume = {30},
- year = {2017}
+author = {Vaswani, Ashish and Shazeer, Noam and Parmar, Niki and Uszkoreit, Jakob and Jones, Llion and Gomez, Aidan N and Kaiser, \L ukasz and Polosukhin, Illia},
+booktitle = {Advances in Neural Information Processing Systems},
+editor = {I. Guyon and U. Von Luxburg and S. Bengio and H. Wallach and R. Fergus and S. Vishwanathan and R. Garnett},
+pages = {},
+publisher = {Curran Associates, Inc.},
+title = {Attention is All you Need},
+url = {https://proceedings.neurips.cc/paper/2017/file/3f5ee243547dee91fbd053c1c4a845aa-Paper.pdf},
+volume = {30},
+year = {2017}
}
@article{wu2015TimeSeries,
- title = {Time {{Series Forecasting}} with {{Missing Values}}},
- author = {Wu, Shin-Fu and Chang, Chia-Yung and Lee, Shie-Jue},
- year = {2015},
- month = apr,
- journal = {EAI Endorsed Transactions on Cognitive Communications},
- volume = {"1"},
- number = {4},
- issn = {2313-4534},
- url = {https://eudl.eu/doi/10.4108/icst.iniscom.2015.258269}
+title = {Time {{Series Forecasting}} with {{Missing Values}}},
+author = {Wu, Shin-Fu and Chang, Chia-Yung and Lee, Shie-Jue},
+year = {2015},
+month = apr,
+journal = {EAI Endorsed Transactions on Cognitive Communications},
+volume = {"1"},
+number = {4},
+issn = {2313-4534},
+url = {https://eudl.eu/doi/10.4108/icst.iniscom.2015.258269}
}
@article{yoon2017EstimatingMissing,
- title = {Estimating {{Missing Data}} in {{Temporal Data Streams Using Multi-directional Recurrent Neural Networks}}},
- author = {Yoon, Jinsung and Zame, William R. and {van der Schaar}, Mihaela},
- year = {2017},
- month = nov,
- journal = {arXiv:1711.08742 [cs]},
- eprint = {1711.08742},
- eprinttype = {arxiv},
- primaryclass = {cs},
- url = {http://arxiv.org/abs/1711.08742},
- archiveprefix = {arXiv},
- keywords = {Computer Science - Machine Learning}
+title = {Estimating {{Missing Data}} in {{Temporal Data Streams Using Multi-directional Recurrent Neural Networks}}},
+author = {Yoon, Jinsung and Zame, William R. and {van der Schaar}, Mihaela},
+year = {2017},
+month = nov,
+journal = {arXiv:1711.08742 [cs]},
+eprint = {1711.08742},
+eprinttype = {arxiv},
+primaryclass = {cs},
+url = {http://arxiv.org/abs/1711.08742},
+archiveprefix = {arXiv},
+keywords = {Computer Science - Machine Learning}
}
@article{yuan2019E2GAN,
- title = {{{E}}{$^{2}$}{{GAN}}: {{End-to-End Generative Adversarial Network}} for {{Multivariate Time Series Imputation}}},
- author = {Yuan, Xiaojie and Luo, Yonghong and Zhang, Ying and Cai, Xiangrui},
- year = {2019},
- pages = {3094--3100},
- url = {https://www.ijcai.org/Proceedings/2019/429}
+title = {{{E}}{$^{2}$}{{GAN}}: {{End-to-End Generative Adversarial Network}} for {{Multivariate Time Series Imputation}}},
+author = {Yuan, Xiaojie and Luo, Yonghong and Zhang, Ying and Cai, Xiangrui},
+year = {2019},
+pages = {3094--3100},
+url = {https://www.ijcai.org/Proceedings/2019/429}
}
@article{zhang2022Raindrop,
- title = {Graph-{{Guided Network}} for {{Irregularly Sampled Multivariate Time Series}}},
- author = {Zhang, Xiang and Zeman, Marko and Tsiligkaridis, Theodoros and Zitnik, Marinka},
- year = {2022},
- month = mar,
- journal = {arXiv:2110.05357 [cs]},
- eprint = {2110.05357},
- eprinttype = {arxiv},
- primaryclass = {cs},
- url = {http://arxiv.org/abs/2110.05357},
- archiveprefix = {arXiv},
- keywords = {Computer Science - Artificial Intelligence,Computer Science - Machine Learning}
+title = {Graph-{{Guided Network}} for {{Irregularly Sampled Multivariate Time Series}}},
+author = {Zhang, Xiang and Zeman, Marko and Tsiligkaridis, Theodoros and Zitnik, Marinka},
+year = {2022},
+month = mar,
+journal = {arXiv:2110.05357 [cs]},
+eprint = {2110.05357},
+eprinttype = {arxiv},
+primaryclass = {cs},
+url = {http://arxiv.org/abs/2110.05357},
+archiveprefix = {arXiv},
+keywords = {Computer Science - Artificial Intelligence,Computer Science - Machine Learning}
}
diff --git a/pypots/base.py b/pypots/base.py
index 106f981d..49b1b0c2 100644
--- a/pypots/base.py
+++ b/pypots/base.py
@@ -11,8 +11,8 @@
import numpy as np
import torch
-from pypots.utils.logging import logger
from pypots.utils.files import create_dir_if_not_exist
+from pypots.utils.logging import logger
class BaseModel(ABC):
diff --git a/pypots/classification/base.py b/pypots/classification/base.py
index 54d40889..598902aa 100644
--- a/pypots/classification/base.py
+++ b/pypots/classification/base.py
@@ -77,7 +77,59 @@ def __init__(
self.n_classes = n_classes
@abstractmethod
- def assemble_input_data(self, data):
+ def assemble_input_for_training(self, data) -> dict:
+ """Assemble the given data into a dictionary for training input.
+
+ Parameters
+ ----------
+ data : list,
+ Input data from dataloader, should be list.
+
+ Returns
+ -------
+ dict,
+ A python dictionary contains the input data for model training.
+ """
+ pass
+
+ @abstractmethod
+ def assemble_input_for_validating(self, data) -> dict:
+ """Assemble the given data into a dictionary for validating input.
+
+ Parameters
+ ----------
+ data : list,
+ Data output from dataloader, should be list.
+
+ Returns
+ -------
+ dict,
+ A python dictionary contains the input data for model validating.
+ """
+ pass
+
+ @abstractmethod
+ def assemble_input_for_testing(self, data) -> dict:
+ """Assemble the given data into a dictionary for testing input.
+
+ Notes
+ -----
+ The processing functions of train/val/test stages are separated for the situation that the input of
+ the three stages are different, and this situation usually happens when the Dataset/Dataloader classes
+ used in the train/val/test stages are not the same, e.g. the training data and validating data in a
+ classification task contains labels, but the testing data (from the production environment) generally
+ doesn't have labels.
+
+ Parameters
+ ----------
+ data : list,
+ Data output from dataloader, should be list.
+
+ Returns
+ -------
+ dict,
+ A python dictionary contains the input data for model testing.
+ """
pass
def _train_model(self, training_loader, val_loader=None):
@@ -94,7 +146,7 @@ def _train_model(self, training_loader, val_loader=None):
self.model.train()
epoch_train_loss_collector = []
for idx, data in enumerate(training_loader):
- inputs = self.assemble_input_data(data)
+ inputs = self.assemble_input_for_training(data)
self.optimizer.zero_grad()
results = self.model.forward(inputs)
results["loss"].backward()
@@ -111,7 +163,7 @@ def _train_model(self, training_loader, val_loader=None):
epoch_val_loss_collector = []
with torch.no_grad():
for idx, data in enumerate(val_loader):
- inputs = self.assemble_input_data(data)
+ inputs = self.assemble_input_for_validating(data)
results = self.model.forward(inputs)
epoch_val_loss_collector.append(results["loss"].item())
diff --git a/pypots/classification/brits.py b/pypots/classification/brits.py
index f73dbcf5..5ef03860 100644
--- a/pypots/classification/brits.py
+++ b/pypots/classification/brits.py
@@ -219,7 +219,7 @@ def fit(self, train_X, train_y, val_X=None, val_y=None):
self.model.eval() # set the model as eval status to freeze it.
return self
- def assemble_input_data(self, data):
+ def assemble_input_for_training(self, data):
"""Assemble the input data into a dictionary.
Parameters
@@ -248,7 +248,11 @@ def assemble_input_data(self, data):
inputs = {
"indices": indices,
"label": label,
- "forward": {"X": X, "missing_mask": missing_mask, "deltas": deltas},
+ "forward": {
+ "X": X,
+ "missing_mask": missing_mask,
+ "deltas": deltas,
+ },
"backward": {
"X": back_X,
"missing_mask": back_missing_mask,
@@ -257,6 +261,70 @@ def assemble_input_data(self, data):
}
return inputs
+ def assemble_input_for_validating(self, data) -> dict:
+ """Assemble the given data into a dictionary for validating input.
+
+ Notes
+ -----
+ The validating data assembling processing is the same as training data assembling.
+
+
+ Parameters
+ ----------
+ data : list,
+ A list containing data fetched from Dataset by Dataloader.
+
+ Returns
+ -------
+ inputs : dict,
+ A python dictionary contains the input data for model validating.
+ """
+ return self.assemble_input_for_training(data)
+
+ def assemble_input_for_testing(self, data) -> dict:
+ """Assemble the given data into a dictionary for testing input.
+
+ Notes
+ -----
+ The testing data assembling processing is the same as training data assembling.
+
+ Parameters
+ ----------
+ data : list,
+ A list containing data fetched from Dataset by Dataloader.
+
+ Returns
+ -------
+ inputs : dict,
+ A python dictionary contains the input data for model testing.
+ """
+ # fetch data
+ (
+ indices,
+ X,
+ missing_mask,
+ deltas,
+ back_X,
+ back_missing_mask,
+ back_deltas,
+ ) = data
+
+ # assemble input data
+ inputs = {
+ "indices": indices,
+ "forward": {
+ "X": X,
+ "missing_mask": missing_mask,
+ "deltas": deltas,
+ },
+ "backward": {
+ "X": back_X,
+ "deltas": back_deltas,
+ "missing_mask": back_missing_mask,
+ },
+ }
+ return inputs
+
def classify(self, X):
X = self.check_input(self.n_steps, self.n_features, X)
self.model.eval() # set the model as eval status to freeze it.
@@ -266,27 +334,7 @@ def classify(self, X):
with torch.no_grad():
for idx, data in enumerate(test_loader):
- # cannot use input_data_processing, cause here has no label
- (
- indices,
- X,
- missing_mask,
- deltas,
- back_X,
- back_missing_mask,
- back_deltas,
- ) = data
- # assemble input data
- inputs = {
- "indices": indices,
- "forward": {"X": X, "missing_mask": missing_mask, "deltas": deltas},
- "backward": {
- "X": back_X,
- "missing_mask": back_missing_mask,
- "deltas": back_deltas,
- },
- }
-
+ inputs = self.assemble_input_for_testing(data)
results, _, _ = self.model.classify(inputs)
prediction_collector.append(results["prediction"])
diff --git a/pypots/classification/grud.py b/pypots/classification/grud.py
index 7b313eb0..69929dcc 100644
--- a/pypots/classification/grud.py
+++ b/pypots/classification/grud.py
@@ -181,7 +181,7 @@ def fit(self, train_X, train_y, val_X=None, val_y=None):
self.model.eval() # set the model as eval status to freeze it.
return self
- def assemble_input_data(self, data):
+ def assemble_input_for_training(self, data):
"""Assemble the input data into a dictionary.
Parameters
@@ -209,6 +209,56 @@ def assemble_input_data(self, data):
}
return inputs
+ def assemble_input_for_validating(self, data) -> dict:
+ """Assemble the given data into a dictionary for validating input.
+
+ Notes
+ -----
+ The validating data assembling processing is the same as training data assembling.
+
+
+ Parameters
+ ----------
+ data : list,
+ A list containing data fetched from Dataset by Dataloader.
+
+ Returns
+ -------
+ inputs : dict,
+ A python dictionary contains the input data for model validating.
+ """
+ return self.assemble_input_for_training(data)
+
+ def assemble_input_for_testing(self, data) -> dict:
+ """Assemble the given data into a dictionary for testing input.
+
+ Notes
+ -----
+ The testing data assembling processing is the same as training data assembling.
+
+ Parameters
+ ----------
+ data : list,
+ A list containing data fetched from Dataset by Dataloader.
+
+ Returns
+ -------
+ inputs : dict,
+ A python dictionary contains the input data for model testing.
+ """
+ indices, X, X_filledLOCF, missing_mask, deltas, empirical_mean = data
+
+ inputs = {
+ "indices": indices,
+ "X": X,
+ "X_filledLOCF": X_filledLOCF,
+ "missing_mask": missing_mask,
+ "deltas": deltas,
+ "empirical_mean": empirical_mean,
+ }
+
+ return inputs
+
def classify(self, X):
X = self.check_input(self.n_steps, self.n_features, X)
self.model.eval() # set the model as eval status to freeze it.
@@ -218,18 +268,7 @@ def classify(self, X):
with torch.no_grad():
for idx, data in enumerate(test_loader):
- # cannot use input_data_processing, cause here has no label
- indices, X, X_filledLOCF, missing_mask, deltas, empirical_mean = data
- # assemble input data
- inputs = {
- "indices": indices,
- "X": X,
- "X_filledLOCF": X_filledLOCF,
- "missing_mask": missing_mask,
- "deltas": deltas,
- "empirical_mean": empirical_mean,
- }
-
+ inputs = self.assemble_input_for_testing(data)
prediction = self.model.classify(inputs)
prediction_collector.append(prediction)
diff --git a/pypots/classification/raindrop.py b/pypots/classification/raindrop.py
index d63f0560..c6204bc5 100644
--- a/pypots/classification/raindrop.py
+++ b/pypots/classification/raindrop.py
@@ -702,7 +702,7 @@ def fit(self, train_X, train_y, val_X=None, val_y=None):
self.model.eval() # set the model as eval status to freeze it.
return self
- def assemble_input_data(self, data):
+ def assemble_input_for_training(self, data):
"""Assemble the input data into a dictionary.
Parameters
@@ -736,6 +736,58 @@ def assemble_input_data(self, data):
}
return inputs
+ def assemble_input_for_validating(self, data) -> dict:
+ """Assemble the given data into a dictionary for validating input.
+
+ Notes
+ -----
+ The validating data assembling processing is the same as training data assembling.
+
+
+ Parameters
+ ----------
+ data : list,
+ A list containing data fetched from Dataset by Dataloader.
+
+ Returns
+ -------
+ inputs : dict,
+ A python dictionary contains the input data for model validating.
+ """
+ return self.assemble_input_for_training(data)
+
+ def assemble_input_for_testing(self, data) -> dict:
+ """Assemble the given data into a dictionary for testing input.
+
+ Parameters
+ ----------
+ data : list,
+ A list containing data fetched from Dataset by Dataloader.
+
+ Returns
+ -------
+ inputs : dict,
+ A python dictionary contains the input data for model testing.
+ """
+ indices, X, X_filledLOCF, missing_mask, deltas, empirical_mean = data
+ bz, n_steps, n_features = X.shape
+ lengths = torch.tensor([n_steps] * bz, dtype=torch.float)
+ times = torch.tensor(range(n_steps), dtype=torch.float).repeat(bz, 1)
+
+ X = X.permute(1, 0, 2)
+ missing_mask = missing_mask.permute(1, 0, 2)
+ times = times.permute(1, 0)
+
+ inputs = {
+ "X": X,
+ "static": None,
+ "timestamps": times,
+ "lengths": lengths,
+ "missing_mask": missing_mask,
+ }
+
+ return inputs
+
def classify(self, X):
X = self.check_input(self.n_steps, self.n_features, X)
self.model.eval() # set the model as eval status to freeze it.
@@ -745,26 +797,7 @@ def classify(self, X):
with torch.no_grad():
for idx, data in enumerate(test_loader):
- # cannot use input_data_processing, cause here has no label
- indices, X, X_filledLOCF, missing_mask, deltas, empirical_mean = data
- # assemble input data
-
- bz, n_steps, n_features = X.shape
- lengths = torch.tensor([n_steps] * bz, dtype=torch.float)
- times = torch.tensor(range(n_steps), dtype=torch.float).repeat(bz, 1)
-
- X = X.permute(1, 0, 2)
- missing_mask = missing_mask.permute(1, 0, 2)
- times = times.permute(1, 0)
-
- inputs = {
- "X": X,
- "static": None,
- "timestamps": times,
- "lengths": lengths,
- "missing_mask": missing_mask,
- }
-
+ inputs = self.assemble_input_for_testing(data)
prediction = self.model.classify(inputs)
prediction_collector.append(prediction)
diff --git a/pypots/clustering/base.py b/pypots/clustering/base.py
index 30f69f49..f3cc8c2e 100644
--- a/pypots/clustering/base.py
+++ b/pypots/clustering/base.py
@@ -71,7 +71,59 @@ def __init__(
self.n_clusters = n_clusters
@abstractmethod
- def assemble_input_data(self, data):
+ def assemble_input_for_training(self, data) -> dict:
+ """Assemble the given data into a dictionary for training input.
+
+ Parameters
+ ----------
+ data : list,
+ Input data from dataloader, should be list.
+
+ Returns
+ -------
+ dict,
+ A python dictionary contains the input data for model training.
+ """
+ pass
+
+ @abstractmethod
+ def assemble_input_for_validating(self, data) -> dict:
+ """Assemble the given data into a dictionary for validating input.
+
+ Parameters
+ ----------
+ data : list,
+ Data output from dataloader, should be list.
+
+ Returns
+ -------
+ dict,
+ A python dictionary contains the input data for model validating.
+ """
+ pass
+
+ @abstractmethod
+ def assemble_input_for_testing(self, data) -> dict:
+ """Assemble the given data into a dictionary for testing input.
+
+ Notes
+ -----
+ The processing functions of train/val/test stages are separated for the situation that the input of
+ the three stages are different, and this situation usually happens when the Dataset/Dataloader classes
+ used in the train/val/test stages are not the same, e.g. the training data and validating data in a
+ classification task contains labels, but the testing data (from the production environment) generally
+ doesn't have labels.
+
+ Parameters
+ ----------
+ data : list,
+ Data output from dataloader, should be list.
+
+ Returns
+ -------
+ dict,
+ A python dictionary contains the input data for model testing.
+ """
pass
def _train_model(self, training_loader, val_loader=None):
@@ -88,7 +140,7 @@ def _train_model(self, training_loader, val_loader=None):
self.model.train()
epoch_train_loss_collector = []
for idx, data in enumerate(training_loader):
- inputs = self.assemble_input_data(data)
+ inputs = self.assemble_input_for_training(data)
self.optimizer.zero_grad()
results = self.model.forward(inputs)
results["loss"].backward()
@@ -105,7 +157,7 @@ def _train_model(self, training_loader, val_loader=None):
epoch_val_loss_collector = []
with torch.no_grad():
for idx, data in enumerate(val_loader):
- inputs = self.assemble_input_data(data)
+ inputs = self.assemble_input_for_validating(data)
results = self.model.forward(inputs)
epoch_val_loss_collector.append(results["loss"].item())
diff --git a/pypots/clustering/crli.py b/pypots/clustering/crli.py
index f6305a41..b0bd9723 100644
--- a/pypots/clustering/crli.py
+++ b/pypots/clustering/crli.py
@@ -363,19 +363,20 @@ def fit(self, train_X):
self.model.eval() # set the model as eval status to freeze it.
return self
- def assemble_input_data(self, data):
- """Assemble the input data into a dictionary.
+ def assemble_input_for_training(self, data):
+ """Assemble the given data into a dictionary for training input.
Parameters
----------
- data : list
- A list containing data fetched from Dataset by Dataload.
+ data : list,
+ A list containing data fetched from Dataset by Dataloader.
Returns
-------
- inputs : dict
- A dictionary with data assembled.
+ inputs : dict,
+ A python dictionary contains the input data for model training.
"""
+
# fetch data
indices, X, _, missing_mask, _, _ = data
@@ -383,8 +384,48 @@ def assemble_input_data(self, data):
"X": X,
"missing_mask": missing_mask,
}
+
return inputs
+ def assemble_input_for_validating(self, data) -> dict:
+ """Assemble the given data into a dictionary for validating input.
+
+ Notes
+ -----
+ The validating data assembling processing is the same as training data assembling.
+
+
+ Parameters
+ ----------
+ data : list,
+ A list containing data fetched from Dataset by Dataloader.
+
+ Returns
+ -------
+ inputs : dict,
+ A python dictionary contains the input data for model validating.
+ """
+ return self.assemble_input_for_training(data)
+
+ def assemble_input_for_testing(self, data) -> dict:
+ """Assemble the given data into a dictionary for testing input.
+
+ Notes
+ -----
+ The testing data assembling processing is the same as training data assembling.
+
+ Parameters
+ ----------
+ data : list,
+ A list containing data fetched from Dataset by Dataloader.
+
+ Returns
+ -------
+ inputs : dict,
+ A python dictionary contains the input data for model testing.
+ """
+ return self.assemble_input_for_training(data)
+
def _train_model(self, training_loader, val_loader=None):
self.G_optimizer = torch.optim.Adam(
[
@@ -410,7 +451,7 @@ def _train_model(self, training_loader, val_loader=None):
epoch_train_loss_G_collector = []
epoch_train_loss_D_collector = []
for idx, data in enumerate(training_loader):
- inputs = self.assemble_input_data(data)
+ inputs = self.assemble_input_for_training(data)
for _ in range(self.D_steps):
self.D_optimizer.zero_grad()
@@ -483,7 +524,7 @@ def cluster(self, X):
with torch.no_grad():
for idx, data in enumerate(test_loader):
- inputs = self.assemble_input_data(data)
+ inputs = self.assemble_input_for_testing(data)
inputs = self.model.cluster(inputs)
latent_collector.append(inputs["fcn_latent"])
diff --git a/pypots/clustering/vader.py b/pypots/clustering/vader.py
index 7922daac..14f682fe 100644
--- a/pypots/clustering/vader.py
+++ b/pypots/clustering/vader.py
@@ -389,19 +389,20 @@ def fit(self, train_X):
self.model.eval() # set the model as eval status to freeze it.
return self
- def assemble_input_data(self, data):
- """Assemble the input data into a dictionary.
+ def assemble_input_for_training(self, data):
+ """Assemble the given data into a dictionary for training input.
Parameters
----------
- data : list
- A list containing data fetched from Dataset by Dataload.
+ data : list,
+ A list containing data fetched from Dataset by Dataloader.
Returns
-------
- inputs : dict
- A dictionary with data assembled.
+ inputs : dict,
+ A python dictionary contains the input data for model training.
"""
+
# fetch data
indices, X, _, missing_mask, _, _ = data
@@ -409,8 +410,48 @@ def assemble_input_data(self, data):
"X": X,
"missing_mask": missing_mask,
}
+
return inputs
+ def assemble_input_for_validating(self, data) -> dict:
+ """Assemble the given data into a dictionary for validating input.
+
+ Notes
+ -----
+ The validating data assembling processing is the same as training data assembling.
+
+
+ Parameters
+ ----------
+ data : list,
+ A list containing data fetched from Dataset by Dataloader.
+
+ Returns
+ -------
+ inputs : dict,
+ A python dictionary contains the input data for model validating.
+ """
+ return self.assemble_input_for_training(data)
+
+ def assemble_input_for_testing(self, data) -> dict:
+ """Assemble the given data into a dictionary for testing input.
+
+ Notes
+ -----
+ The testing data assembling processing is the same as training data assembling.
+
+ Parameters
+ ----------
+ data : list,
+ A list containing data fetched from Dataset by Dataloader.
+
+ Returns
+ -------
+ inputs : dict,
+ A python dictionary contains the input data for model testing.
+ """
+ return self.assemble_input_for_training(data)
+
def _train_model(self, training_loader, val_loader=None):
self.optimizer = torch.optim.Adam(
self.model.parameters(), lr=self.lr, weight_decay=self.weight_decay
@@ -424,7 +465,7 @@ def _train_model(self, training_loader, val_loader=None):
for epoch in range(self.pretrain_epochs):
self.model.train()
for idx, data in enumerate(training_loader):
- inputs = self.assemble_input_data(data)
+ inputs = self.assemble_input_for_training(data)
self.optimizer.zero_grad()
results = self.model.forward(inputs, pretrain=True)
results["loss"].backward()
@@ -433,7 +474,7 @@ def _train_model(self, training_loader, val_loader=None):
sample_collector = []
for _ in range(10): # sampling 10 times
for idx, data in enumerate(training_loader):
- inputs = self.assemble_input_data(data)
+ inputs = self.assemble_input_for_validating(data)
results = self.model.forward(inputs, pretrain=True)
sample_collector.append(results["z"])
samples = torch.cat(sample_collector).cpu().detach().numpy()
@@ -456,7 +497,7 @@ def _train_model(self, training_loader, val_loader=None):
self.model.train()
epoch_train_loss_collector = []
for idx, data in enumerate(training_loader):
- inputs = self.assemble_input_data(data)
+ inputs = self.assemble_input_for_training(data)
self.optimizer.zero_grad()
results = self.model.forward(inputs)
results["loss"].backward()
@@ -473,7 +514,7 @@ def _train_model(self, training_loader, val_loader=None):
epoch_val_loss_collector = []
with torch.no_grad():
for idx, data in enumerate(val_loader):
- inputs = self.assemble_input_data(data)
+ inputs = self.assemble_input_for_validating(data)
results = self.model.forward(inputs)
epoch_val_loss_collector.append(results["loss"].item())
@@ -525,7 +566,7 @@ def cluster(self, X):
with torch.no_grad():
for idx, data in enumerate(test_loader):
- inputs = self.assemble_input_data(data)
+ inputs = self.assemble_input_for_testing(data)
results = self.model.cluster(inputs)
clustering_results_collector.append(results)
diff --git a/pypots/data/dataset_for_brits.py b/pypots/data/dataset_for_brits.py
index 0f3ee6a7..087bdba8 100644
--- a/pypots/data/dataset_for_brits.py
+++ b/pypots/data/dataset_for_brits.py
@@ -22,7 +22,7 @@ def parse_delta(missing_mask):
-------
delta, array,
Delta matrix indicates time gaps of missing values.
- Its math definition please refer to :cite:`che2018MissingData`.
+ Its math definition please refer to :cite:`che2018GRUD`.
"""
# missing_mask is from X, and X's shape and type had been checked. So no need to double-check here.
n_samples, n_steps, n_features = missing_mask.shape
diff --git a/pypots/data/dataset_for_mit.py b/pypots/data/dataset_for_mit.py
index b24e3f75..0edd8a88 100644
--- a/pypots/data/dataset_for_mit.py
+++ b/pypots/data/dataset_for_mit.py
@@ -14,7 +14,7 @@
class DatasetForMIT(BaseDataset):
"""Dataset for models that need MIT (masked imputation task) in their training, such as SAITS.
- For more information about MIT, please refer to :cite:`du2022SAITS`.
+ For more information about MIT, please refer to :cite:`du2023SAITS`.
Parameters
----------
diff --git a/pypots/forecasting/base.py b/pypots/forecasting/base.py
index 64beadde..282b0336 100644
--- a/pypots/forecasting/base.py
+++ b/pypots/forecasting/base.py
@@ -14,6 +14,7 @@
from pypots.base import BaseModel, BaseNNModel
from pypots.utils.logging import logger
+
class BaseForecaster(BaseModel):
"""Abstract class for all forecasting models."""
@@ -62,7 +63,59 @@ def __init__(
)
@abstractmethod
- def assemble_input_data(self, data):
+ def assemble_input_for_training(self, data) -> dict:
+ """Assemble the given data into a dictionary for training input.
+
+ Parameters
+ ----------
+ data : list,
+ Input data from dataloader, should be list.
+
+ Returns
+ -------
+ dict,
+ A python dictionary contains the input data for model training.
+ """
+ pass
+
+ @abstractmethod
+ def assemble_input_for_validating(self, data) -> dict:
+ """Assemble the given data into a dictionary for validating input.
+
+ Parameters
+ ----------
+ data : list,
+ Data output from dataloader, should be list.
+
+ Returns
+ -------
+ dict,
+ A python dictionary contains the input data for model validating.
+ """
+ pass
+
+ @abstractmethod
+ def assemble_input_for_testing(self, data) -> dict:
+ """Assemble the given data into a dictionary for testing input.
+
+ Notes
+ -----
+ The processing functions of train/val/test stages are separated for the situation that the input of
+ the three stages are different, and this situation usually happens when the Dataset/Dataloader classes
+ used in the train/val/test stages are not the same, e.g. the training data and validating data in a
+ classification task contains labels, but the testing data (from the production environment) generally
+ doesn't have labels.
+
+ Parameters
+ ----------
+ data : list,
+ Data output from dataloader, should be list.
+
+ Returns
+ -------
+ dict,
+ A python dictionary contains the input data for model testing.
+ """
pass
def _train_model(self, training_loader, val_loader=None):
@@ -79,7 +132,7 @@ def _train_model(self, training_loader, val_loader=None):
self.model.train()
epoch_train_loss_collector = []
for idx, data in enumerate(training_loader):
- inputs = self.assemble_input_data(data)
+ inputs = self.assemble_input_for_training(data)
self.optimizer.zero_grad()
results = self.model.forward(inputs)
results["loss"].backward()
@@ -96,7 +149,7 @@ def _train_model(self, training_loader, val_loader=None):
epoch_val_loss_collector = []
with torch.no_grad():
for idx, data in enumerate(val_loader):
- inputs = self.assemble_input_data(data)
+ inputs = self.assemble_input_for_validating(data)
results = self.model.forward(inputs)
epoch_val_loss_collector.append(results["loss"].item())
diff --git a/pypots/imputation/base.py b/pypots/imputation/base.py
index a7290e14..e62ae50c 100644
--- a/pypots/imputation/base.py
+++ b/pypots/imputation/base.py
@@ -12,8 +12,8 @@
import torch
from pypots.base import BaseModel, BaseNNModel
-from pypots.utils.metrics import cal_mae
from pypots.utils.logging import logger
+from pypots.utils.metrics import cal_mae
try:
import nni
@@ -71,7 +71,59 @@ def __init__(
)
@abstractmethod
- def assemble_input_data(self, data):
+ def assemble_input_for_training(self, data) -> dict:
+ """Assemble the given data into a dictionary for training input.
+
+ Parameters
+ ----------
+ data : list,
+ Input data from dataloader, should be list.
+
+ Returns
+ -------
+ dict,
+ A python dictionary contains the input data for model training.
+ """
+ pass
+
+ @abstractmethod
+ def assemble_input_for_validating(self, data) -> dict:
+ """Assemble the given data into a dictionary for validating input.
+
+ Parameters
+ ----------
+ data : list,
+ Data output from dataloader, should be list.
+
+ Returns
+ -------
+ dict,
+ A python dictionary contains the input data for model validating.
+ """
+ pass
+
+ @abstractmethod
+ def assemble_input_for_testing(self, data) -> dict:
+ """Assemble the given data into a dictionary for testing input.
+
+ Notes
+ -----
+ The processing functions of train/val/test stages are separated for the situation that the input of
+ the three stages are different, and this situation usually happens when the Dataset/Dataloader classes
+ used in the train/val/test stages are not the same, e.g. the training data and validating data in a
+ classification task contains labels, but the testing data (from the production environment) generally
+ doesn't have labels.
+
+ Parameters
+ ----------
+ data : list,
+ Data output from dataloader, should be list.
+
+ Returns
+ -------
+ dict,
+ A python dictionary contains the input data for model testing.
+ """
pass
def _train_model(
@@ -94,7 +146,7 @@ def _train_model(
self.model.train()
epoch_train_loss_collector = []
for idx, data in enumerate(training_loader):
- inputs = self.assemble_input_data(data)
+ inputs = self.assemble_input_for_training(data)
self.optimizer.zero_grad()
results = self.model.forward(inputs)
results["loss"].backward()
@@ -111,7 +163,7 @@ def _train_model(
imputation_collector = []
with torch.no_grad():
for idx, data in enumerate(val_loader):
- inputs = self.assemble_input_data(data)
+ inputs = self.assemble_input_for_validating(data)
results = self.model.forward(inputs)
imputation_collector.append(results["imputed_data"])
diff --git a/pypots/imputation/brits.py b/pypots/imputation/brits.py
index 46587d81..d15c8e33 100644
--- a/pypots/imputation/brits.py
+++ b/pypots/imputation/brits.py
@@ -537,25 +537,31 @@ def fit(self, train_X, val_X=None):
self.model.eval() # set the model as eval status to freeze it.
return self
- def assemble_input_data(self, data):
- """Assemble the input data into a dictionary.
+ def assemble_input_for_training(self, data):
+ """Assemble the given data into a dictionary for training input.
Parameters
----------
- data : list
- A list containing data fetched from Dataset by Dataload.
+ data : list,
+ A list containing data fetched from Dataset by Dataloader.
Returns
-------
- inputs : dict
- A dictionary with data assembled.
+ inputs : dict,
+ A python dictionary contains the input data for model training.
"""
+
# fetch data
indices, X, missing_mask, deltas, back_X, back_missing_mask, back_deltas = data
+
# assemble input data
inputs = {
"indices": indices,
- "forward": {"X": X, "missing_mask": missing_mask, "deltas": deltas},
+ "forward": {
+ "X": X,
+ "missing_mask": missing_mask,
+ "deltas": deltas,
+ },
"backward": {
"X": back_X,
"missing_mask": back_missing_mask,
@@ -565,6 +571,45 @@ def assemble_input_data(self, data):
return inputs
+ def assemble_input_for_validating(self, data) -> dict:
+ """Assemble the given data into a dictionary for validating input.
+
+ Notes
+ -----
+ The validating data assembling processing is the same as training data assembling.
+
+
+ Parameters
+ ----------
+ data : list,
+ A list containing data fetched from Dataset by Dataloader.
+
+ Returns
+ -------
+ inputs : dict,
+ A python dictionary contains the input data for model validating.
+ """
+ return self.assemble_input_for_training(data)
+
+ def assemble_input_for_testing(self, data) -> dict:
+ """Assemble the given data into a dictionary for testing input.
+
+ Notes
+ -----
+ The testing data assembling processing is the same as training data assembling.
+
+ Parameters
+ ----------
+ data : list,
+ A list containing data fetched from Dataset by Dataloader.
+
+ Returns
+ -------
+ inputs : dict,
+ A python dictionary contains the input data for model testing.
+ """
+ return self.assemble_input_for_training(data)
+
def impute(self, X):
X = self.check_input(self.n_steps, self.n_features, X)
self.model.eval() # set the model as eval status to freeze it.
@@ -574,7 +619,7 @@ def impute(self, X):
with torch.no_grad():
for idx, data in enumerate(test_loader):
- inputs = self.assemble_input_data(data)
+ inputs = self.assemble_input_for_testing(data)
imputed_data = self.model.impute(inputs)
imputation_collector.append(imputed_data)
diff --git a/pypots/imputation/saits.py b/pypots/imputation/saits.py
index 3badbbbe..d32bd0ab 100644
--- a/pypots/imputation/saits.py
+++ b/pypots/imputation/saits.py
@@ -239,19 +239,20 @@ def fit(self, train_X, val_X=None):
self.model.load_state_dict(self.best_model_dict)
self.model.eval() # set the model as eval status to freeze it.
- def assemble_input_data(self, data):
- """Assemble the input data into a dictionary.
+ def assemble_input_for_training(self, data):
+ """Assemble the given data into a dictionary for training input.
Parameters
----------
- data : list
- A list containing data fetched from Dataset by Dataload.
+ data : list,
+ A list containing data fetched from Dataset by Dataloader.
Returns
-------
- inputs : dict
- A dictionary with data assembled.
+ inputs : dict,
+ A python dictionary contains the input data for model training.
"""
+
indices, X_intact, X, missing_mask, indicating_mask = data
inputs = {
@@ -263,6 +264,45 @@ def assemble_input_data(self, data):
return inputs
+ def assemble_input_for_validating(self, data) -> dict:
+ """Assemble the given data into a dictionary for validating input.
+
+ Notes
+ -----
+ The validating data assembling processing is the same as training data assembling.
+
+
+ Parameters
+ ----------
+ data : list,
+ A list containing data fetched from Dataset by Dataloader.
+
+ Returns
+ -------
+ inputs : dict,
+ A python dictionary contains the input data for model validating.
+ """
+ return self.assemble_input_for_training(data)
+
+ def assemble_input_for_testing(self, data) -> dict:
+ """Assemble the given data into a dictionary for testing input.
+
+ Notes
+ -----
+ The testing data assembling processing is the same as training data assembling.
+
+ Parameters
+ ----------
+ data : list,
+ A list containing data fetched from Dataset by Dataloader.
+
+ Returns
+ -------
+ inputs : dict,
+ A python dictionary contains the input data for model testing.
+ """
+ return self.assemble_input_for_training(data)
+
def impute(self, X):
X = self.check_input(self.n_steps, self.n_features, X)
self.model.eval() # set the model as eval status to freeze it.
diff --git a/pypots/imputation/transformer.py b/pypots/imputation/transformer.py
index 8146a266..c84c30b1 100644
--- a/pypots/imputation/transformer.py
+++ b/pypots/imputation/transformer.py
@@ -330,18 +330,18 @@ def fit(self, train_X, val_X=None):
self.model.eval() # set the model as eval status to freeze it.
return self
- def assemble_input_data(self, data):
- """Assemble the input data into a dictionary.
+ def assemble_input_for_training(self, data):
+ """Assemble the given data into a dictionary for training input.
Parameters
----------
- data : list
- A list containing data fetched from Dataset by Dataload.
+ data : list,
+ A list containing data fetched from Dataset by Dataloader.
Returns
-------
- inputs : dict
- A dictionary with data assembled.
+ inputs : dict,
+ A python dictionary contains the input data for model training.
"""
indices, X_intact, X, missing_mask, indicating_mask = data
@@ -355,6 +355,45 @@ def assemble_input_data(self, data):
return inputs
+ def assemble_input_for_validating(self, data) -> dict:
+ """Assemble the given data into a dictionary for validating input.
+
+ Notes
+ -----
+ The validating data assembling processing is the same as training data assembling.
+
+
+ Parameters
+ ----------
+ data : list,
+ A list containing data fetched from Dataset by Dataloader.
+
+ Returns
+ -------
+ inputs : dict,
+ A python dictionary contains the input data for model validating.
+ """
+ return self.assemble_input_for_training(data)
+
+ def assemble_input_for_testing(self, data) -> dict:
+ """Assemble the given data into a dictionary for testing input.
+
+ Notes
+ -----
+ The testing data assembling processing is the same as training data assembling.
+
+ Parameters
+ ----------
+ data : list,
+ A list containing data fetched from Dataset by Dataloader.
+
+ Returns
+ -------
+ inputs : dict,
+ A python dictionary contains the input data for model testing.
+ """
+ return self.assemble_input_for_training(data)
+
def impute(self, X):
X = self.check_input(self.n_steps, self.n_features, X)
self.model.eval() # set the model as eval status to freeze it.
diff --git a/requirements.txt b/requirements.txt
index e22d68a0..59de6847 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,6 +1,5 @@
-matplotlib
numpy >= 1.23.3
-scikit_learn >= 0.24.1
+scikit-learn >= 0.24.1
torch == 1.11.0
scipy
tensorboard
diff --git a/setup.py b/setup.py
index f0ba6587..ba9febff 100644
--- a/setup.py
+++ b/setup.py
@@ -31,9 +31,8 @@
packages=find_packages(exclude=["tests"]),
include_package_data=True,
install_requires=[
- "matplotlib",
- "numpy",
- "scikit_learn",
+ "numpy>=1.23.3",
+ "scikit-learn>=0.24.1",
"scipy",
"torch>=1.10", # torch_sparse v0.6.12 requires 1.9<=torch<1.10, v0.6.13 needs torch>=1.10
# "torch_sparse==0.6.13",