From 831e9d47a20858c3e774987394487e7aac1d6dbf Mon Sep 17 00:00:00 2001 From: Wenjie Du Date: Fri, 24 Feb 2023 19:53:18 +0800 Subject: [PATCH 1/4] doc: fix the reference ; --- docs/index.rst | 4 +- docs/references.bib | 627 +++++++++++++++---------------- pypots/data/dataset_for_brits.py | 2 +- pypots/data/dataset_for_mit.py | 2 +- 4 files changed, 316 insertions(+), 319 deletions(-) diff --git a/docs/index.rst b/docs/index.rst index 07a62304..435125bf 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -97,8 +97,8 @@ Below is an example applying SAITS in PyPOTS to impute missing values in the dat ============================== ================ ========================================================================= ====== ========= Task Type Algorithm Year Reference ============================== ================ ========================================================================= ====== ========= -Imputation Neural Network SAITS (Self-Attention-based Imputation for Time Series) 2022 :cite:`du2022SAITS` -Imputation Neural Network Transformer 2017 :cite:`vaswani2017Transformer`, :cite:`du2022SAITS` +Imputation Neural Network SAITS (Self-Attention-based Imputation for Time Series) 2022 :cite:`du2023SAITS` +Imputation Neural Network Transformer 2017 :cite:`vaswani2017Transformer`, :cite:`du2023SAITS` Imputation, Classification Neural Network BRITS (Bidirectional Recurrent Imputation for Time Series) 2018 :cite:`cao2018BRITS` Imputation Naive LOCF (Last Observation Carried Forward) / / Classification Neural Network GRU-D 2018 :cite:`che2018GRUD` diff --git a/docs/references.bib b/docs/references.bib index 9a06b474..a0735aa6 100644 --- a/docs/references.bib +++ b/docs/references.bib @@ -1,403 +1,400 @@ @article{cao2018BRITS, - title = {{{BRITS}}: {{Bidirectional Recurrent Imputation}} for {{Time Series}}}, - author = {Cao, Wei and Wang, Dong and Li, Jian and Zhou, Hao and Li, Lei and Li, Yitan}, - year = {2018}, - month = may, - journal = {arXiv:1805.10572 [cs, stat]}, - eprint = {1805.10572}, - eprinttype = {arxiv}, - primaryclass = {cs, stat}, - url = {http://arxiv.org/abs/1805.10572}, - archiveprefix = {arXiv}, - keywords = {Computer Science - Machine Learning,Statistics - Machine Learning} +title = {{{BRITS}}: {{Bidirectional Recurrent Imputation}} for {{Time Series}}}, +author = {Cao, Wei and Wang, Dong and Li, Jian and Zhou, Hao and Li, Lei and Li, Yitan}, +year = {2018}, +month = may, +journal = {arXiv:1805.10572 [cs, stat]}, +eprint = {1805.10572}, +eprinttype = {arxiv}, +primaryclass = {cs, stat}, +url = {http://arxiv.org/abs/1805.10572}, +archiveprefix = {arXiv}, +keywords = {Computer Science - Machine Learning,Statistics - Machine Learning} } @article{che2018GRUD, - title = {Recurrent {{Neural Networks}} for {{Multivariate Time Series}} with {{Missing Values}}}, - author = {Che, Zhengping and Purushotham, Sanjay and Cho, Kyunghyun and Sontag, David and Liu, Yan}, - year = {2018}, - month = apr, - journal = {Scientific Reports}, - volume = {8}, - number = {1}, - pages = {6085}, - publisher = {{Nature Publishing Group}}, - issn = {2045-2322}, - doi = {10.1038/s41598-018-24271-9}, - url = {https://www.nature.com/articles/s41598-018-24271-9}, - copyright = {2018 The Author(s)} +title = {Recurrent {{Neural Networks}} for {{Multivariate Time Series}} with {{Missing Values}}}, +author = {Che, Zhengping and Purushotham, Sanjay and Cho, Kyunghyun and Sontag, David and Liu, Yan}, +year = {2018}, +month = apr, +journal = {Scientific Reports}, +volume = {8}, +number = {1}, +pages = {6085}, +publisher = {{Nature Publishing Group}}, +issn = {2045-2322}, +doi = {10.1038/s41598-018-24271-9}, +url = {https://www.nature.com/articles/s41598-018-24271-9}, +copyright = {2018 The Author(s)} } @article{chen2021BTMF, - title = {Bayesian {{Temporal Factorization}} for {{Multidimensional Time Series Prediction}}}, - author = {Chen, Xinyu and Sun, Lijun}, - year = {2021}, - journal = {IEEE Transactions on Pattern Analysis and Machine Intelligence}, - eprint = {1910.06366}, - eprinttype = {arxiv}, - pages = {1--1}, - issn = {0162-8828, 2160-9292, 1939-3539}, - doi = {10.1109/TPAMI.2021.3066551}, - url = {http://arxiv.org/abs/1910.06366}, - archiveprefix = {arXiv}, - keywords = {Computer Science - Machine Learning,Statistics - Machine Learning} +title = {Bayesian {{Temporal Factorization}} for {{Multidimensional Time Series Prediction}}}, +author = {Chen, Xinyu and Sun, Lijun}, +year = {2021}, +journal = {IEEE Transactions on Pattern Analysis and Machine Intelligence}, +eprint = {1910.06366}, +eprinttype = {arxiv}, +pages = {1--1}, +issn = {0162-8828, 2160-9292, 1939-3539}, +doi = {10.1109/TPAMI.2021.3066551}, +url = {http://arxiv.org/abs/1910.06366}, +archiveprefix = {arXiv}, +keywords = {Computer Science - Machine Learning,Statistics - Machine Learning} } @article{choi2020RDISRandom, - title = {{{RDIS}}: {{Random Drop Imputation}} with {{Self-Training}} for {{Incomplete Time Series Data}}}, - author = {Choi, Tae-Min and Kang, Ji-Su and Kim, Jong-Hwan}, - year = {2020}, - month = oct, - journal = {arXiv:2010.10075 [cs, stat]}, - eprint = {2010.10075}, - eprinttype = {arxiv}, - primaryclass = {cs, stat}, - url = {http://arxiv.org/abs/2010.10075}, - archiveprefix = {arXiv}, - keywords = {Computer Science - Machine Learning,Statistics - Machine Learning} +title = {{{RDIS}}: {{Random Drop Imputation}} with {{Self-Training}} for {{Incomplete Time Series Data}}}, +author = {Choi, Tae-Min and Kang, Ji-Su and Kim, Jong-Hwan}, +year = {2020}, +month = oct, +journal = {arXiv:2010.10075 [cs, stat]}, +eprint = {2010.10075}, +eprinttype = {arxiv}, +primaryclass = {cs, stat}, +url = {http://arxiv.org/abs/2010.10075}, +archiveprefix = {arXiv}, +keywords = {Computer Science - Machine Learning,Statistics - Machine Learning} } @article{cini2021MultivariateTime, - title = {Multivariate {{Time Series Imputation}} by {{Graph Neural Networks}}}, - author = {Cini, Andrea and Marisca, Ivan and Alippi, Cesare}, - year = {2021}, - month = sep, - journal = {arXiv:2108.00298 [cs]}, - eprint = {2108.00298}, - eprinttype = {arxiv}, - primaryclass = {cs}, - url = {http://arxiv.org/abs/2108.00298}, - archiveprefix = {arXiv}, - keywords = {Computer Science - Artificial Intelligence,Computer Science - Machine Learning} +title = {Multivariate {{Time Series Imputation}} by {{Graph Neural Networks}}}, +author = {Cini, Andrea and Marisca, Ivan and Alippi, Cesare}, +year = {2021}, +month = sep, +journal = {arXiv:2108.00298 [cs]}, +eprint = {2108.00298}, +eprinttype = {arxiv}, +primaryclass = {cs}, +url = {http://arxiv.org/abs/2108.00298}, +archiveprefix = {arXiv}, +keywords = {Computer Science - Artificial Intelligence,Computer Science - Machine Learning} } @inproceedings{costa2018MissingData, - title = {Missing {{Data Imputation}} via {{Denoising Autoencoders}}: {{The Untold Story}}}, - booktitle = {Advances in {{Intelligent Data Analysis XVII}}}, - author = {Costa, Adriana Fonseca and Santos, Miriam Seoane and Soares, Jastin Pompeu and Abreu, Pedro Henriques}, - editor = {Duivesteijn, Wouter and Siebes, Arno and Ukkonen, Antti}, - year = {2018}, - series = {Lecture {{Notes}} in {{Computer Science}}}, - pages = {87--98}, - publisher = {{Springer International Publishing}}, - address = {{Cham}}, - doi = {10.1007/978-3-030-01768-2_8}, - isbn = {978-3-030-01768-2}, - keywords = {Data imputation,Denoising autoencoders,Missing data,Missing mechanisms} +title = {Missing {{Data Imputation}} via {{Denoising Autoencoders}}: {{The Untold Story}}}, +booktitle = {Advances in {{Intelligent Data Analysis XVII}}}, +author = {Costa, Adriana Fonseca and Santos, Miriam Seoane and Soares, Jastin Pompeu and Abreu, Pedro Henriques}, +editor = {Duivesteijn, Wouter and Siebes, Arno and Ukkonen, Antti}, +year = {2018}, +series = {Lecture {{Notes}} in {{Computer Science}}}, +pages = {87--98}, +publisher = {{Springer International Publishing}}, +address = {{Cham}}, +doi = {10.1007/978-3-030-01768-2_8}, +isbn = {978-3-030-01768-2}, +keywords = {Data imputation,Denoising autoencoders,Missing data,Missing mechanisms} } @article{dejong2019VaDER, - title = {Deep Learning for Clustering of Multivariate Clinical Patient Trajectories with Missing Values}, - author = {{de~Jong}, Johann and Emon, Mohammad Asif and Wu, Ping and Karki, Reagon and Sood, Meemansa and Godard, Patrice and Ahmad, Ashar and Vrooman, Henri and {Hofmann-Apitius}, Martin and Fr{\"o}hlich, Holger}, - year = {2019}, - month = nov, - journal = {GigaScience}, - volume = {8}, - number = {11}, - pages = {giz134}, - issn = {2047-217X}, - doi = {10.1093/gigascience/giz134}, - url = {https://doi.org/10.1093/gigascience/giz134} +title = {Deep Learning for Clustering of Multivariate Clinical Patient Trajectories with Missing Values}, +author = {{de~Jong}, Johann and Emon, Mohammad Asif and Wu, Ping and Karki, Reagon and Sood, Meemansa and Godard, Patrice and Ahmad, Ashar and Vrooman, Henri and {Hofmann-Apitius}, Martin and Fr{\"o}hlich, Holger}, +year = {2019}, +month = nov, +journal = {GigaScience}, +volume = {8}, +number = {11}, +pages = {giz134}, +issn = {2047-217X}, +doi = {10.1093/gigascience/giz134}, +url = {https://doi.org/10.1093/gigascience/giz134} } -@article{du2022SAITS, - title = {{{SAITS}}: {{Self-Attention-based Imputation}} for {{Time Series}}}, - author = {Du, Wenjie and C{\^o}t{\'e}, David and Liu, Yan}, - year = {2022}, - month = feb, - journal = {arXiv:2202.08516 [cs]}, - eprint = {2202.08516}, - eprinttype = {arxiv}, - primaryclass = {cs}, - url = {http://arxiv.org/abs/2202.08516}, - archiveprefix = {arXiv}, - keywords = {Computer Science - Machine Learning} +@article{du2023SAITS, +title = {{SAITS: Self-Attention-based Imputation for Time Series}}, +journal = {Expert Systems with Applications}, +volume = {219}, +pages = {119619}, +year = {2023}, +issn = {0957-4174}, +doi = {https://doi.org/10.1016/j.eswa.2023.119619}, +url = {https://www.sciencedirect.com/science/article/pii/S0957417423001203}, +author = {Wenjie Du and David Cote and Yan Liu}, } - @article{fortuin2020GPVAEDeep, - title = {{{GP-VAE}}: {{Deep Probabilistic Time Series Imputation}}}, - author = {Fortuin, Vincent and Baranchuk, Dmitry and R{\"a}tsch, Gunnar and Mandt, Stephan}, - year = {2020}, - month = feb, - journal = {arXiv:1907.04155 [cs, stat]}, - eprint = {1907.04155}, - eprinttype = {arxiv}, - primaryclass = {cs, stat}, - url = {http://arxiv.org/abs/1907.04155}, - archiveprefix = {arXiv}, - keywords = {Computer Science - Machine Learning,Statistics - Machine Learning} +title = {{{GP-VAE}}: {{Deep Probabilistic Time Series Imputation}}}, +author = {Fortuin, Vincent and Baranchuk, Dmitry and R{\"a}tsch, Gunnar and Mandt, Stephan}, +year = {2020}, +month = feb, +journal = {arXiv:1907.04155 [cs, stat]}, +eprint = {1907.04155}, +eprinttype = {arxiv}, +primaryclass = {cs, stat}, +url = {http://arxiv.org/abs/1907.04155}, +archiveprefix = {arXiv}, +keywords = {Computer Science - Machine Learning,Statistics - Machine Learning} } @article{horn2019SeFT, - title = {Set {{Functions}} for {{Time Series}}}, - author = {Horn, Max and Moor, Michael and Bock, Christian and Rieck, Bastian and Borgwardt, Karsten}, - year = {2019}, - month = sep, - url = {https://arxiv.org/abs/1909.12064v3} +title = {Set {{Functions}} for {{Time Series}}}, +author = {Horn, Max and Moor, Michael and Bock, Christian and Rieck, Bastian and Borgwardt, Karsten}, +year = {2019}, +month = sep, +url = {https://arxiv.org/abs/1909.12064v3} } @article{hubert1985AdjustedRI, - title = {Comparing Partitions}, - author = {Hubert, Lawrence and Arabie, Phipps}, - year = {1985}, - month = dec, - journal = {Journal of Classification}, - volume = {2}, - number = {1}, - pages = {193--218}, - issn = {1432-1343}, - doi = {10.1007/BF01908075}, - url = {https://doi.org/10.1007/BF01908075}, - keywords = {Consensus indices,Measures of agreement,Measures of association} +title = {Comparing Partitions}, +author = {Hubert, Lawrence and Arabie, Phipps}, +year = {1985}, +month = dec, +journal = {Journal of Classification}, +volume = {2}, +number = {1}, +pages = {193--218}, +issn = {1432-1343}, +doi = {10.1007/BF01908075}, +url = {https://doi.org/10.1007/BF01908075}, +keywords = {Consensus indices,Measures of agreement,Measures of association} } @article{little1988TestMCAR, - title = {A {{Test}} of {{Missing Completely}} at {{Random}} for {{Multivariate Data}} with {{Missing Values}}}, - author = {Little, Roderick J. A.}, - year = {1988}, - journal = {Journal of the American Statistical Association}, - volume = {83}, - number = {404}, - pages = {1198--1202}, - publisher = {{[American Statistical Association, Taylor \& Francis, Ltd.]}}, - issn = {0162-1459}, - doi = {10.2307/2290157}, - url = {https://www.jstor.org/stable/2290157} +title = {A {{Test}} of {{Missing Completely}} at {{Random}} for {{Multivariate Data}} with {{Missing Values}}}, +author = {Little, Roderick J. A.}, +year = {1988}, +journal = {Journal of the American Statistical Association}, +volume = {83}, +number = {404}, +pages = {1198--1202}, +publisher = {{[American Statistical Association, Taylor \& Francis, Ltd.]}}, +issn = {0162-1459}, +doi = {10.2307/2290157}, +url = {https://www.jstor.org/stable/2290157} } @inproceedings{liu2019NAOMI, - title = {{{NAOMI}}: {{Non-Autoregressive Multiresolution Sequence Imputation}}}, - booktitle = {{{arXiv}}:1901.10946 [Cs, Stat]}, - author = {Liu, Yukai and Yu, Rose and Zheng, Stephan and Zhan, Eric and Yue, Yisong}, - year = {2019}, - month = oct, - eprint = {1901.10946}, - eprinttype = {arxiv}, - primaryclass = {cs, stat}, - url = {http://arxiv.org/abs/1901.10946}, - archiveprefix = {arXiv}, - keywords = {Computer Science - Machine Learning,Statistics - Machine Learning} +title = {{{NAOMI}}: {{Non-Autoregressive Multiresolution Sequence Imputation}}}, +booktitle = {{{arXiv}}:1901.10946 [Cs, Stat]}, +author = {Liu, Yukai and Yu, Rose and Zheng, Stephan and Zhan, Eric and Yue, Yisong}, +year = {2019}, +month = oct, +eprint = {1901.10946}, +eprinttype = {arxiv}, +primaryclass = {cs, stat}, +url = {http://arxiv.org/abs/1901.10946}, +archiveprefix = {arXiv}, +keywords = {Computer Science - Machine Learning,Statistics - Machine Learning} } @incollection{luo2018MultivariateTime, - title = {Multivariate {{Time Series Imputation}} with {{Generative Adversarial Networks}}}, - booktitle = {Advances in {{Neural Information Processing Systems}} 31}, - author = {Luo, Yonghong and Cai, Xiangrui and ZHANG, Ying and Xu, Jun and {xiaojie}, Yuan}, - editor = {Bengio, S. and Wallach, H. and Larochelle, H. and Grauman, K. and {Cesa-Bianchi}, N. and Garnett, R.}, - year = {2018}, - pages = {1596--1607}, - publisher = {{Curran Associates, Inc.}}, - url = {http://papers.nips.cc/paper/7432-multivariate-time-series-imputation-with-generative-adversarial-networks.pdf} +title = {Multivariate {{Time Series Imputation}} with {{Generative Adversarial Networks}}}, +booktitle = {Advances in {{Neural Information Processing Systems}} 31}, +author = {Luo, Yonghong and Cai, Xiangrui and ZHANG, Ying and Xu, Jun and {xiaojie}, Yuan}, +editor = {Bengio, S. and Wallach, H. and Larochelle, H. and Grauman, K. and {Cesa-Bianchi}, N. and Garnett, R.}, +year = {2018}, +pages = {1596--1607}, +publisher = {{Curran Associates, Inc.}}, +url = {http://papers.nips.cc/paper/7432-multivariate-time-series-imputation-with-generative-adversarial-networks.pdf} } @article{ma2019CDSA, - title = {{{CDSA}}: {{Cross-Dimensional Self-Attention}} for {{Multivariate}}, {{Geo-tagged Time Series Imputation}}}, - author = {Ma, Jiawei and Shou, Zheng and Zareian, Alireza and Mansour, Hassan and Vetro, Anthony and Chang, Shih-Fu}, - year = {2019}, - month = aug, - journal = {arXiv:1905.09904 [cs, stat]}, - eprint = {1905.09904}, - eprinttype = {arxiv}, - primaryclass = {cs, stat}, - url = {http://arxiv.org/abs/1905.09904}, - archiveprefix = {arXiv}, - keywords = {Computer Science - Machine Learning,Statistics - Machine Learning} +title = {{{CDSA}}: {{Cross-Dimensional Self-Attention}} for {{Multivariate}}, {{Geo-tagged Time Series Imputation}}}, +author = {Ma, Jiawei and Shou, Zheng and Zareian, Alireza and Mansour, Hassan and Vetro, Anthony and Chang, Shih-Fu}, +year = {2019}, +month = aug, +journal = {arXiv:1905.09904 [cs, stat]}, +eprint = {1905.09904}, +eprinttype = {arxiv}, +primaryclass = {cs, stat}, +url = {http://arxiv.org/abs/1905.09904}, +archiveprefix = {arXiv}, +keywords = {Computer Science - Machine Learning,Statistics - Machine Learning} } @article{ma2021CRLI, - title = {Learning {{Representations}} for {{Incomplete Time Series Clustering}}}, - author = {Ma, Qianli and Chen, Chuxin and Li, Sen and Cottrell, Garrison W.}, - year = {2021}, - month = may, - journal = {Proceedings of the AAAI Conference on Artificial Intelligence}, - volume = {35}, - number = {10}, - pages = {8837--8846}, - issn = {2374-3468}, - url = {https://ojs.aaai.org/index.php/AAAI/article/view/17070}, - copyright = {Copyright (c) 2021 Association for the Advancement of Artificial Intelligence}, - keywords = {Time-Series/Data Streams} +title = {Learning {{Representations}} for {{Incomplete Time Series Clustering}}}, +author = {Ma, Qianli and Chen, Chuxin and Li, Sen and Cottrell, Garrison W.}, +year = {2021}, +month = may, +journal = {Proceedings of the AAAI Conference on Artificial Intelligence}, +volume = {35}, +number = {10}, +pages = {8837--8846}, +issn = {2374-3468}, +url = {https://ojs.aaai.org/index.php/AAAI/article/view/17070}, +copyright = {Copyright (c) 2021 Association for the Advancement of Artificial Intelligence}, +keywords = {Time-Series/Data Streams} } @article{miao2021SSGAN, - title = {Generative {{Semi-supervised Learning}} for {{Multivariate Time Series Imputation}}}, - author = {Miao, Xiaoye and Wu, Yangyang and Wang, Jun and Gao, Yunjun and Mao, Xudong and Yin, Jianwei}, - year = {2021}, - month = may, - journal = {Proceedings of the AAAI Conference on Artificial Intelligence}, - volume = {35}, - number = {10}, - pages = {8983--8991}, - issn = {2374-3468}, - url = {https://ojs.aaai.org/index.php/AAAI/article/view/17086}, - copyright = {Copyright (c) 2021 Association for the Advancement of Artificial Intelligence}, - keywords = {Time-Series/Data Streams} +title = {Generative {{Semi-supervised Learning}} for {{Multivariate Time Series Imputation}}}, +author = {Miao, Xiaoye and Wu, Yangyang and Wang, Jun and Gao, Yunjun and Mao, Xudong and Yin, Jianwei}, +year = {2021}, +month = may, +journal = {Proceedings of the AAAI Conference on Artificial Intelligence}, +volume = {35}, +number = {10}, +pages = {8983--8991}, +issn = {2374-3468}, +url = {https://ojs.aaai.org/index.php/AAAI/article/view/17086}, +copyright = {Copyright (c) 2021 Association for the Advancement of Artificial Intelligence}, +keywords = {Time-Series/Data Streams} } @article{mikalsen2017TimeSeries, - title = {Time {{Series Cluster Kernel}} for {{Learning Similarities}} between {{Multivariate Time Series}} with {{Missing Data}}}, - author = {Mikalsen, Karl {\O}yvind and Bianchi, Filippo Maria and {Soguero-Ruiz}, Cristina and Jenssen, Robert}, - year = {2017}, - month = jun, - journal = {arXiv:1704.00794 [cs, stat]}, - eprint = {1704.00794}, - eprinttype = {arxiv}, - primaryclass = {cs, stat}, - url = {http://arxiv.org/abs/1704.00794}, - archiveprefix = {arXiv}, - keywords = {Computer Science - Machine Learning,Statistics - Machine Learning} +title = {Time {{Series Cluster Kernel}} for {{Learning Similarities}} between {{Multivariate Time Series}} with {{Missing Data}}}, +author = {Mikalsen, Karl {\O}yvind and Bianchi, Filippo Maria and {Soguero-Ruiz}, Cristina and Jenssen, Robert}, +year = {2017}, +month = jun, +journal = {arXiv:1704.00794 [cs, stat]}, +eprint = {1704.00794}, +eprinttype = {arxiv}, +primaryclass = {cs, stat}, +url = {http://arxiv.org/abs/1704.00794}, +archiveprefix = {arXiv}, +keywords = {Computer Science - Machine Learning,Statistics - Machine Learning} } @inproceedings{oh2021STINGSelfattention, - title = {{{STING}}: {{Self-attention}} Based {{Time-series Imputation Networks}} Using {{GAN}}}, - booktitle = {2021 {{IEEE International Conference}} on {{Data Mining}} ({{ICDM}})}, - author = {Oh, Eunkyu and Kim, Taehun and Ji, Yunhu and Khyalia, Sushil}, - year = {2021}, - month = dec, - pages = {1264--1269}, - issn = {2374-8486}, - doi = {10.1109/ICDM51629.2021.00155}, - keywords = {bidirectional RNN,Conferences,Correlation,Data collection,Deep learning,generative adversarial networks,Generative adversarial networks,Recurrent neural networks,self-attention,Time series analysis,time-series imputation} +title = {{{STING}}: {{Self-attention}} Based {{Time-series Imputation Networks}} Using {{GAN}}}, +booktitle = {2021 {{IEEE International Conference}} on {{Data Mining}} ({{ICDM}})}, +author = {Oh, Eunkyu and Kim, Taehun and Ji, Yunhu and Khyalia, Sushil}, +year = {2021}, +month = dec, +pages = {1264--1269}, +issn = {2374-8486}, +doi = {10.1109/ICDM51629.2021.00155}, +keywords = {bidirectional RNN,Conferences,Correlation,Data collection,Deep learning,generative adversarial networks,Generative adversarial networks,Recurrent neural networks,self-attention,Time series analysis,time-series imputation} } @article{oyvindmikalsen2021TimeSeries, - title = {Time Series Cluster Kernels to Exploit Informative Missingness and Incomplete Label Information}, - author = {{\O}yvind Mikalsen, Karl and {Soguero-Ruiz}, Cristina and Maria Bianchi, Filippo and Revhaug, Arthur and Jenssen, Robert}, - year = {2021}, - month = jul, - journal = {Pattern Recognition}, - volume = {115}, - pages = {107896}, - issn = {0031-3203}, - doi = {10.1016/j.patcog.2021.107896}, - url = {https://www.sciencedirect.com/science/article/pii/S0031320321000832}, - keywords = {Informative missingness,Kernel methods,Missing data,Multivariate time series,Semi-supervised learning} +title = {Time Series Cluster Kernels to Exploit Informative Missingness and Incomplete Label Information}, +author = {{\O}yvind Mikalsen, Karl and {Soguero-Ruiz}, Cristina and Maria Bianchi, Filippo and Revhaug, Arthur and Jenssen, Robert}, +year = {2021}, +month = jul, +journal = {Pattern Recognition}, +volume = {115}, +pages = {107896}, +issn = {0031-3203}, +doi = {10.1016/j.patcog.2021.107896}, +url = {https://www.sciencedirect.com/science/article/pii/S0031320321000832}, +keywords = {Informative missingness,Kernel methods,Missing data,Multivariate time series,Semi-supervised learning} } @article{rand1971RandIndex, - title = {Objective {{Criteria}} for the {{Evaluation}} of {{Clustering Methods}}}, - author = {Rand, William M.}, - year = {1971}, - journal = {Journal of the American Statistical Association}, - volume = {66}, - number = {336}, - pages = {846--850}, - publisher = {{[American Statistical Association, Taylor \& Francis, Ltd.]}}, - issn = {0162-1459}, - doi = {10.2307/2284239}, - url = {https://www.jstor.org/stable/2284239} +title = {Objective {{Criteria}} for the {{Evaluation}} of {{Clustering Methods}}}, +author = {Rand, William M.}, +year = {1971}, +journal = {Journal of the American Statistical Association}, +volume = {66}, +number = {336}, +pages = {846--850}, +publisher = {{[American Statistical Association, Taylor \& Francis, Ltd.]}}, +issn = {0162-1459}, +doi = {10.2307/2284239}, +url = {https://www.jstor.org/stable/2284239} } @article{shukla2021MultiTimeAttention, - title = {Multi-{{Time Attention Networks}} for {{Irregularly Sampled Time Series}}}, - author = {Shukla, Satya Narayan and Marlin, Benjamin M.}, - year = {2021}, - month = jun, - journal = {arXiv:2101.10318 [cs]}, - eprint = {2101.10318}, - eprinttype = {arxiv}, - primaryclass = {cs}, - url = {http://arxiv.org/abs/2101.10318}, - archiveprefix = {arXiv}, - keywords = {Computer Science - Artificial Intelligence,Computer Science - Machine Learning} +title = {Multi-{{Time Attention Networks}} for {{Irregularly Sampled Time Series}}}, +author = {Shukla, Satya Narayan and Marlin, Benjamin M.}, +year = {2021}, +month = jun, +journal = {arXiv:2101.10318 [cs]}, +eprint = {2101.10318}, +eprinttype = {arxiv}, +primaryclass = {cs}, +url = {http://arxiv.org/abs/2101.10318}, +archiveprefix = {arXiv}, +keywords = {Computer Science - Artificial Intelligence,Computer Science - Machine Learning} } @inproceedings{suo2020GLIMAGlobal, - title = {{{GLIMA}}: {{Global}} and {{Local Time Series Imputation}} with {{Multi-directional Attention Learning}}}, - booktitle = {2020 {{IEEE International Conference}} on {{Big Data}} ({{Big Data}})}, - author = {Suo, Qiuling and Zhong, Weida and Xun, Guangxu and Sun, Jianhui and Chen, Changyou and Zhang, Aidong}, - year = {2020}, - month = dec, - pages = {798--807}, - doi = {10.1109/BigData50022.2020.9378408}, - keywords = {Big Data,Conferences,Correlation,Missing Data,Recurrent Imputation,Recurrent neural networks,Self-Attention,Task analysis,Tensors,Time Series,Time series analysis} +title = {{{GLIMA}}: {{Global}} and {{Local Time Series Imputation}} with {{Multi-directional Attention Learning}}}, +booktitle = {2020 {{IEEE International Conference}} on {{Big Data}} ({{Big Data}})}, +author = {Suo, Qiuling and Zhong, Weida and Xun, Guangxu and Sun, Jianhui and Chen, Changyou and Zhang, Aidong}, +year = {2020}, +month = dec, +pages = {798--807}, +doi = {10.1109/BigData50022.2020.9378408}, +keywords = {Big Data,Conferences,Correlation,Missing Data,Recurrent Imputation,Recurrent neural networks,Self-Attention,Task analysis,Tensors,Time Series,Time series analysis} } @article{tang2019JointModeling, - title = {Joint {{Modeling}} of {{Local}} and {{Global Temporal Dynamics}} for {{Multivariate Time Series Forecasting}} with {{Missing Values}}}, - author = {Tang, Xianfeng and Yao, Huaxiu and Sun, Yiwei and Aggarwal, Charu and Mitra, Prasenjit and Wang, Suhang}, - year = {2019}, - month = nov, - journal = {arXiv:1911.10273 [cs, stat]}, - eprint = {1911.10273}, - eprinttype = {arxiv}, - primaryclass = {cs, stat}, - url = {http://arxiv.org/abs/1911.10273}, - archiveprefix = {arXiv}, - keywords = {Computer Science - Machine Learning,Statistics - Machine Learning} +title = {Joint {{Modeling}} of {{Local}} and {{Global Temporal Dynamics}} for {{Multivariate Time Series Forecasting}} with {{Missing Values}}}, +author = {Tang, Xianfeng and Yao, Huaxiu and Sun, Yiwei and Aggarwal, Charu and Mitra, Prasenjit and Wang, Suhang}, +year = {2019}, +month = nov, +journal = {arXiv:1911.10273 [cs, stat]}, +eprint = {1911.10273}, +eprinttype = {arxiv}, +primaryclass = {cs, stat}, +url = {http://arxiv.org/abs/1911.10273}, +archiveprefix = {arXiv}, +keywords = {Computer Science - Machine Learning,Statistics - Machine Learning} } @article{tashiro2021CSDI, - title = {{{CSDI}}: {{Conditional Score-based Diffusion Models}} for {{Probabilistic Time Series Imputation}}}, - author = {Tashiro, Yusuke and Song, Jiaming and Song, Yang and Ermon, Stefano}, - year = {2021}, - month = oct, - journal = {arXiv:2107.03502 [cs, stat]}, - eprint = {2107.03502}, - eprinttype = {arxiv}, - primaryclass = {cs, stat}, - url = {http://arxiv.org/abs/2107.03502}, - archiveprefix = {arXiv}, - keywords = {Computer Science - Machine Learning,Statistics - Machine Learning} +title = {{{CSDI}}: {{Conditional Score-based Diffusion Models}} for {{Probabilistic Time Series Imputation}}}, +author = {Tashiro, Yusuke and Song, Jiaming and Song, Yang and Ermon, Stefano}, +year = {2021}, +month = oct, +journal = {arXiv:2107.03502 [cs, stat]}, +eprint = {2107.03502}, +eprinttype = {arxiv}, +primaryclass = {cs, stat}, +url = {http://arxiv.org/abs/2107.03502}, +archiveprefix = {arXiv}, +keywords = {Computer Science - Machine Learning,Statistics - Machine Learning} } @inproceedings{vaswani2017Transformer, - author = {Vaswani, Ashish and Shazeer, Noam and Parmar, Niki and Uszkoreit, Jakob and Jones, Llion and Gomez, Aidan N and Kaiser, \L ukasz and Polosukhin, Illia}, - booktitle = {Advances in Neural Information Processing Systems}, - editor = {I. Guyon and U. Von Luxburg and S. Bengio and H. Wallach and R. Fergus and S. Vishwanathan and R. Garnett}, - pages = {}, - publisher = {Curran Associates, Inc.}, - title = {Attention is All you Need}, - url = {https://proceedings.neurips.cc/paper/2017/file/3f5ee243547dee91fbd053c1c4a845aa-Paper.pdf}, - volume = {30}, - year = {2017} +author = {Vaswani, Ashish and Shazeer, Noam and Parmar, Niki and Uszkoreit, Jakob and Jones, Llion and Gomez, Aidan N and Kaiser, \L ukasz and Polosukhin, Illia}, +booktitle = {Advances in Neural Information Processing Systems}, +editor = {I. Guyon and U. Von Luxburg and S. Bengio and H. Wallach and R. Fergus and S. Vishwanathan and R. Garnett}, +pages = {}, +publisher = {Curran Associates, Inc.}, +title = {Attention is All you Need}, +url = {https://proceedings.neurips.cc/paper/2017/file/3f5ee243547dee91fbd053c1c4a845aa-Paper.pdf}, +volume = {30}, +year = {2017} } @article{wu2015TimeSeries, - title = {Time {{Series Forecasting}} with {{Missing Values}}}, - author = {Wu, Shin-Fu and Chang, Chia-Yung and Lee, Shie-Jue}, - year = {2015}, - month = apr, - journal = {EAI Endorsed Transactions on Cognitive Communications}, - volume = {"1"}, - number = {4}, - issn = {2313-4534}, - url = {https://eudl.eu/doi/10.4108/icst.iniscom.2015.258269} +title = {Time {{Series Forecasting}} with {{Missing Values}}}, +author = {Wu, Shin-Fu and Chang, Chia-Yung and Lee, Shie-Jue}, +year = {2015}, +month = apr, +journal = {EAI Endorsed Transactions on Cognitive Communications}, +volume = {"1"}, +number = {4}, +issn = {2313-4534}, +url = {https://eudl.eu/doi/10.4108/icst.iniscom.2015.258269} } @article{yoon2017EstimatingMissing, - title = {Estimating {{Missing Data}} in {{Temporal Data Streams Using Multi-directional Recurrent Neural Networks}}}, - author = {Yoon, Jinsung and Zame, William R. and {van der Schaar}, Mihaela}, - year = {2017}, - month = nov, - journal = {arXiv:1711.08742 [cs]}, - eprint = {1711.08742}, - eprinttype = {arxiv}, - primaryclass = {cs}, - url = {http://arxiv.org/abs/1711.08742}, - archiveprefix = {arXiv}, - keywords = {Computer Science - Machine Learning} +title = {Estimating {{Missing Data}} in {{Temporal Data Streams Using Multi-directional Recurrent Neural Networks}}}, +author = {Yoon, Jinsung and Zame, William R. and {van der Schaar}, Mihaela}, +year = {2017}, +month = nov, +journal = {arXiv:1711.08742 [cs]}, +eprint = {1711.08742}, +eprinttype = {arxiv}, +primaryclass = {cs}, +url = {http://arxiv.org/abs/1711.08742}, +archiveprefix = {arXiv}, +keywords = {Computer Science - Machine Learning} } @article{yuan2019E2GAN, - title = {{{E}}{$^{2}$}{{GAN}}: {{End-to-End Generative Adversarial Network}} for {{Multivariate Time Series Imputation}}}, - author = {Yuan, Xiaojie and Luo, Yonghong and Zhang, Ying and Cai, Xiangrui}, - year = {2019}, - pages = {3094--3100}, - url = {https://www.ijcai.org/Proceedings/2019/429} +title = {{{E}}{$^{2}$}{{GAN}}: {{End-to-End Generative Adversarial Network}} for {{Multivariate Time Series Imputation}}}, +author = {Yuan, Xiaojie and Luo, Yonghong and Zhang, Ying and Cai, Xiangrui}, +year = {2019}, +pages = {3094--3100}, +url = {https://www.ijcai.org/Proceedings/2019/429} } @article{zhang2022Raindrop, - title = {Graph-{{Guided Network}} for {{Irregularly Sampled Multivariate Time Series}}}, - author = {Zhang, Xiang and Zeman, Marko and Tsiligkaridis, Theodoros and Zitnik, Marinka}, - year = {2022}, - month = mar, - journal = {arXiv:2110.05357 [cs]}, - eprint = {2110.05357}, - eprinttype = {arxiv}, - primaryclass = {cs}, - url = {http://arxiv.org/abs/2110.05357}, - archiveprefix = {arXiv}, - keywords = {Computer Science - Artificial Intelligence,Computer Science - Machine Learning} +title = {Graph-{{Guided Network}} for {{Irregularly Sampled Multivariate Time Series}}}, +author = {Zhang, Xiang and Zeman, Marko and Tsiligkaridis, Theodoros and Zitnik, Marinka}, +year = {2022}, +month = mar, +journal = {arXiv:2110.05357 [cs]}, +eprint = {2110.05357}, +eprinttype = {arxiv}, +primaryclass = {cs}, +url = {http://arxiv.org/abs/2110.05357}, +archiveprefix = {arXiv}, +keywords = {Computer Science - Artificial Intelligence,Computer Science - Machine Learning} } diff --git a/pypots/data/dataset_for_brits.py b/pypots/data/dataset_for_brits.py index 0f3ee6a7..087bdba8 100644 --- a/pypots/data/dataset_for_brits.py +++ b/pypots/data/dataset_for_brits.py @@ -22,7 +22,7 @@ def parse_delta(missing_mask): ------- delta, array, Delta matrix indicates time gaps of missing values. - Its math definition please refer to :cite:`che2018MissingData`. + Its math definition please refer to :cite:`che2018GRUD`. """ # missing_mask is from X, and X's shape and type had been checked. So no need to double-check here. n_samples, n_steps, n_features = missing_mask.shape diff --git a/pypots/data/dataset_for_mit.py b/pypots/data/dataset_for_mit.py index b24e3f75..0edd8a88 100644 --- a/pypots/data/dataset_for_mit.py +++ b/pypots/data/dataset_for_mit.py @@ -14,7 +14,7 @@ class DatasetForMIT(BaseDataset): """Dataset for models that need MIT (masked imputation task) in their training, such as SAITS. - For more information about MIT, please refer to :cite:`du2022SAITS`. + For more information about MIT, please refer to :cite:`du2023SAITS`. Parameters ---------- From dc3c005f1770bffe294221d64dbcdf0019206c0b Mon Sep 17 00:00:00 2001 From: Wenjie Du Date: Thu, 9 Mar 2023 11:59:01 +0800 Subject: [PATCH 2/4] fix: update the dependencies; --- requirements.txt | 3 +-- setup.py | 5 ++--- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/requirements.txt b/requirements.txt index e22d68a0..59de6847 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,5 @@ -matplotlib numpy >= 1.23.3 -scikit_learn >= 0.24.1 +scikit-learn >= 0.24.1 torch == 1.11.0 scipy tensorboard diff --git a/setup.py b/setup.py index f0ba6587..ba9febff 100644 --- a/setup.py +++ b/setup.py @@ -31,9 +31,8 @@ packages=find_packages(exclude=["tests"]), include_package_data=True, install_requires=[ - "matplotlib", - "numpy", - "scikit_learn", + "numpy>=1.23.3", + "scikit-learn>=0.24.1", "scipy", "torch>=1.10", # torch_sparse v0.6.12 requires 1.9<=torch<1.10, v0.6.13 needs torch>=1.10 # "torch_sparse==0.6.13", From e7b72bd6d75a491dda04437e3789ba19ac3848dc Mon Sep 17 00:00:00 2001 From: Wenjie Du Date: Tue, 28 Mar 2023 13:24:36 +0800 Subject: [PATCH 3/4] doc: update README to add pypots installation with conda; --- README.md | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index 7559a304..686b3042 100644 --- a/README.md +++ b/README.md @@ -5,12 +5,16 @@

- + - + - PyPI + PyPI + + + + on anaconda @@ -26,7 +30,7 @@ - + @@ -53,6 +57,10 @@ Visit [TSDB](https://github.com/WenjieDu/TSDB) right now to know more about this
## ❖ Installation +PyPOTS now is available on
❗️ + +Install it with `conda install pypots`, you may need to specify the channel with option `-c conda-forge` + Install the latest release from PyPI: > pip install pypots @@ -121,16 +129,16 @@ Thank you all for your attention! 😃 [^1]: Du, W., Cote, D., & Liu, Y. (2023). [SAITS: Self-Attention-based Imputation for Time Series](https://doi.org/10.1016/j.eswa.2023.119619). *Expert systems with applications*. -[^2]: Vaswani, A., Shazeer, N.M., Parmar, N., Uszkoreit, J., Jones, L., Gomez, A.N., Kaiser, L., & Polosukhin, I. (2017). [Attention is All you Need](https://papers.nips.cc/paper/2017/hash/3f5ee243547dee91fbd053c1c4a845aa-Abstract.html). *NeurIPS* 2017. -[^3]: Cao, W., Wang, D., Li, J., Zhou, H., Li, L., & Li, Y. (2018). [BRITS: Bidirectional Recurrent Imputation for Time Series](https://papers.nips.cc/paper/2018/hash/734e6bfcd358e25ac1db0a4241b95651-Abstract.html). *NeurIPS* 2018. +[^2]: Vaswani, A., Shazeer, N.M., Parmar, N., Uszkoreit, J., Jones, L., Gomez, A.N., Kaiser, L., & Polosukhin, I. (2017). [Attention is All you Need](https://papers.nips.cc/paper/2017/hash/3f5ee243547dee91fbd053c1c4a845aa-Abstract.html). *NeurIPS 2017*. +[^3]: Cao, W., Wang, D., Li, J., Zhou, H., Li, L., & Li, Y. (2018). [BRITS: Bidirectional Recurrent Imputation for Time Series](https://papers.nips.cc/paper/2018/hash/734e6bfcd358e25ac1db0a4241b95651-Abstract.html). *NeurIPS 2018*. [^4]: Che, Z., Purushotham, S., Cho, K., Sontag, D.A., & Liu, Y. (2018). [Recurrent Neural Networks for Multivariate Time Series with Missing Values](https://www.nature.com/articles/s41598-018-24271-9). *Scientific Reports*. -[^5]: Zhang, X., Zeman, M., Tsiligkaridis, T., & Zitnik, M. (2022). [Graph-Guided Network for Irregularly Sampled Multivariate Time Series](https://arxiv.org/abs/2110.05357). *ICLR* 2022. -[^6]: Ma, Q., Chen, C., Li, S., & Cottrell, G. W. (2021). [Learning Representations for Incomplete Time Series Clustering](https://ojs.aaai.org/index.php/AAAI/article/view/17070). *AAAI* 2021. +[^5]: Zhang, X., Zeman, M., Tsiligkaridis, T., & Zitnik, M. (2022). [Graph-Guided Network for Irregularly Sampled Multivariate Time Series](https://arxiv.org/abs/2110.05357). *ICLR 2022*. +[^6]: Ma, Q., Chen, C., Li, S., & Cottrell, G. W. (2021). [Learning Representations for Incomplete Time Series Clustering](https://ojs.aaai.org/index.php/AAAI/article/view/17070). *AAAI 2021*. [^7]: Jong, J.D., Emon, M.A., Wu, P., Karki, R., Sood, M., Godard, P., Ahmad, A., Vrooman, H.A., Hofmann-Apitius, M., & Fröhlich, H. (2019). [Deep learning for clustering of multivariate clinical patient trajectories with missing values](https://academic.oup.com/gigascience/article/8/11/giz134/5626377). *GigaScience*. [^8]: Chen, X., & Sun, L. (2021). [Bayesian Temporal Factorization for Multidimensional Time Series Prediction](https://arxiv.org/abs/1910.06366). *IEEE transactions on pattern analysis and machine intelligence*.

🏠 Visits - +
From 0611df1f93ea05e4640448fbdd3c7e268193d7be Mon Sep 17 00:00:00 2001 From: Wenjie Du Date: Wed, 29 Mar 2023 17:55:27 +0800 Subject: [PATCH 4/4] feat: separate the input data assembling functions of training, validating, and testing stages; --- pypots/base.py | 2 +- pypots/classification/base.py | 58 ++++++++++++++++++- pypots/classification/brits.py | 94 +++++++++++++++++++++++-------- pypots/classification/grud.py | 65 ++++++++++++++++----- pypots/classification/raindrop.py | 75 +++++++++++++++++------- pypots/clustering/base.py | 58 ++++++++++++++++++- pypots/clustering/crli.py | 57 ++++++++++++++++--- pypots/clustering/vader.py | 63 +++++++++++++++++---- pypots/forecasting/base.py | 59 ++++++++++++++++++- pypots/imputation/base.py | 60 ++++++++++++++++++-- pypots/imputation/brits.py | 61 +++++++++++++++++--- pypots/imputation/saits.py | 52 +++++++++++++++-- pypots/imputation/transformer.py | 51 +++++++++++++++-- 13 files changed, 645 insertions(+), 110 deletions(-) diff --git a/pypots/base.py b/pypots/base.py index 106f981d..49b1b0c2 100644 --- a/pypots/base.py +++ b/pypots/base.py @@ -11,8 +11,8 @@ import numpy as np import torch -from pypots.utils.logging import logger from pypots.utils.files import create_dir_if_not_exist +from pypots.utils.logging import logger class BaseModel(ABC): diff --git a/pypots/classification/base.py b/pypots/classification/base.py index 54d40889..598902aa 100644 --- a/pypots/classification/base.py +++ b/pypots/classification/base.py @@ -77,7 +77,59 @@ def __init__( self.n_classes = n_classes @abstractmethod - def assemble_input_data(self, data): + def assemble_input_for_training(self, data) -> dict: + """Assemble the given data into a dictionary for training input. + + Parameters + ---------- + data : list, + Input data from dataloader, should be list. + + Returns + ------- + dict, + A python dictionary contains the input data for model training. + """ + pass + + @abstractmethod + def assemble_input_for_validating(self, data) -> dict: + """Assemble the given data into a dictionary for validating input. + + Parameters + ---------- + data : list, + Data output from dataloader, should be list. + + Returns + ------- + dict, + A python dictionary contains the input data for model validating. + """ + pass + + @abstractmethod + def assemble_input_for_testing(self, data) -> dict: + """Assemble the given data into a dictionary for testing input. + + Notes + ----- + The processing functions of train/val/test stages are separated for the situation that the input of + the three stages are different, and this situation usually happens when the Dataset/Dataloader classes + used in the train/val/test stages are not the same, e.g. the training data and validating data in a + classification task contains labels, but the testing data (from the production environment) generally + doesn't have labels. + + Parameters + ---------- + data : list, + Data output from dataloader, should be list. + + Returns + ------- + dict, + A python dictionary contains the input data for model testing. + """ pass def _train_model(self, training_loader, val_loader=None): @@ -94,7 +146,7 @@ def _train_model(self, training_loader, val_loader=None): self.model.train() epoch_train_loss_collector = [] for idx, data in enumerate(training_loader): - inputs = self.assemble_input_data(data) + inputs = self.assemble_input_for_training(data) self.optimizer.zero_grad() results = self.model.forward(inputs) results["loss"].backward() @@ -111,7 +163,7 @@ def _train_model(self, training_loader, val_loader=None): epoch_val_loss_collector = [] with torch.no_grad(): for idx, data in enumerate(val_loader): - inputs = self.assemble_input_data(data) + inputs = self.assemble_input_for_validating(data) results = self.model.forward(inputs) epoch_val_loss_collector.append(results["loss"].item()) diff --git a/pypots/classification/brits.py b/pypots/classification/brits.py index f73dbcf5..5ef03860 100644 --- a/pypots/classification/brits.py +++ b/pypots/classification/brits.py @@ -219,7 +219,7 @@ def fit(self, train_X, train_y, val_X=None, val_y=None): self.model.eval() # set the model as eval status to freeze it. return self - def assemble_input_data(self, data): + def assemble_input_for_training(self, data): """Assemble the input data into a dictionary. Parameters @@ -248,7 +248,11 @@ def assemble_input_data(self, data): inputs = { "indices": indices, "label": label, - "forward": {"X": X, "missing_mask": missing_mask, "deltas": deltas}, + "forward": { + "X": X, + "missing_mask": missing_mask, + "deltas": deltas, + }, "backward": { "X": back_X, "missing_mask": back_missing_mask, @@ -257,6 +261,70 @@ def assemble_input_data(self, data): } return inputs + def assemble_input_for_validating(self, data) -> dict: + """Assemble the given data into a dictionary for validating input. + + Notes + ----- + The validating data assembling processing is the same as training data assembling. + + + Parameters + ---------- + data : list, + A list containing data fetched from Dataset by Dataloader. + + Returns + ------- + inputs : dict, + A python dictionary contains the input data for model validating. + """ + return self.assemble_input_for_training(data) + + def assemble_input_for_testing(self, data) -> dict: + """Assemble the given data into a dictionary for testing input. + + Notes + ----- + The testing data assembling processing is the same as training data assembling. + + Parameters + ---------- + data : list, + A list containing data fetched from Dataset by Dataloader. + + Returns + ------- + inputs : dict, + A python dictionary contains the input data for model testing. + """ + # fetch data + ( + indices, + X, + missing_mask, + deltas, + back_X, + back_missing_mask, + back_deltas, + ) = data + + # assemble input data + inputs = { + "indices": indices, + "forward": { + "X": X, + "missing_mask": missing_mask, + "deltas": deltas, + }, + "backward": { + "X": back_X, + "deltas": back_deltas, + "missing_mask": back_missing_mask, + }, + } + return inputs + def classify(self, X): X = self.check_input(self.n_steps, self.n_features, X) self.model.eval() # set the model as eval status to freeze it. @@ -266,27 +334,7 @@ def classify(self, X): with torch.no_grad(): for idx, data in enumerate(test_loader): - # cannot use input_data_processing, cause here has no label - ( - indices, - X, - missing_mask, - deltas, - back_X, - back_missing_mask, - back_deltas, - ) = data - # assemble input data - inputs = { - "indices": indices, - "forward": {"X": X, "missing_mask": missing_mask, "deltas": deltas}, - "backward": { - "X": back_X, - "missing_mask": back_missing_mask, - "deltas": back_deltas, - }, - } - + inputs = self.assemble_input_for_testing(data) results, _, _ = self.model.classify(inputs) prediction_collector.append(results["prediction"]) diff --git a/pypots/classification/grud.py b/pypots/classification/grud.py index 7b313eb0..69929dcc 100644 --- a/pypots/classification/grud.py +++ b/pypots/classification/grud.py @@ -181,7 +181,7 @@ def fit(self, train_X, train_y, val_X=None, val_y=None): self.model.eval() # set the model as eval status to freeze it. return self - def assemble_input_data(self, data): + def assemble_input_for_training(self, data): """Assemble the input data into a dictionary. Parameters @@ -209,6 +209,56 @@ def assemble_input_data(self, data): } return inputs + def assemble_input_for_validating(self, data) -> dict: + """Assemble the given data into a dictionary for validating input. + + Notes + ----- + The validating data assembling processing is the same as training data assembling. + + + Parameters + ---------- + data : list, + A list containing data fetched from Dataset by Dataloader. + + Returns + ------- + inputs : dict, + A python dictionary contains the input data for model validating. + """ + return self.assemble_input_for_training(data) + + def assemble_input_for_testing(self, data) -> dict: + """Assemble the given data into a dictionary for testing input. + + Notes + ----- + The testing data assembling processing is the same as training data assembling. + + Parameters + ---------- + data : list, + A list containing data fetched from Dataset by Dataloader. + + Returns + ------- + inputs : dict, + A python dictionary contains the input data for model testing. + """ + indices, X, X_filledLOCF, missing_mask, deltas, empirical_mean = data + + inputs = { + "indices": indices, + "X": X, + "X_filledLOCF": X_filledLOCF, + "missing_mask": missing_mask, + "deltas": deltas, + "empirical_mean": empirical_mean, + } + + return inputs + def classify(self, X): X = self.check_input(self.n_steps, self.n_features, X) self.model.eval() # set the model as eval status to freeze it. @@ -218,18 +268,7 @@ def classify(self, X): with torch.no_grad(): for idx, data in enumerate(test_loader): - # cannot use input_data_processing, cause here has no label - indices, X, X_filledLOCF, missing_mask, deltas, empirical_mean = data - # assemble input data - inputs = { - "indices": indices, - "X": X, - "X_filledLOCF": X_filledLOCF, - "missing_mask": missing_mask, - "deltas": deltas, - "empirical_mean": empirical_mean, - } - + inputs = self.assemble_input_for_testing(data) prediction = self.model.classify(inputs) prediction_collector.append(prediction) diff --git a/pypots/classification/raindrop.py b/pypots/classification/raindrop.py index d63f0560..c6204bc5 100644 --- a/pypots/classification/raindrop.py +++ b/pypots/classification/raindrop.py @@ -702,7 +702,7 @@ def fit(self, train_X, train_y, val_X=None, val_y=None): self.model.eval() # set the model as eval status to freeze it. return self - def assemble_input_data(self, data): + def assemble_input_for_training(self, data): """Assemble the input data into a dictionary. Parameters @@ -736,6 +736,58 @@ def assemble_input_data(self, data): } return inputs + def assemble_input_for_validating(self, data) -> dict: + """Assemble the given data into a dictionary for validating input. + + Notes + ----- + The validating data assembling processing is the same as training data assembling. + + + Parameters + ---------- + data : list, + A list containing data fetched from Dataset by Dataloader. + + Returns + ------- + inputs : dict, + A python dictionary contains the input data for model validating. + """ + return self.assemble_input_for_training(data) + + def assemble_input_for_testing(self, data) -> dict: + """Assemble the given data into a dictionary for testing input. + + Parameters + ---------- + data : list, + A list containing data fetched from Dataset by Dataloader. + + Returns + ------- + inputs : dict, + A python dictionary contains the input data for model testing. + """ + indices, X, X_filledLOCF, missing_mask, deltas, empirical_mean = data + bz, n_steps, n_features = X.shape + lengths = torch.tensor([n_steps] * bz, dtype=torch.float) + times = torch.tensor(range(n_steps), dtype=torch.float).repeat(bz, 1) + + X = X.permute(1, 0, 2) + missing_mask = missing_mask.permute(1, 0, 2) + times = times.permute(1, 0) + + inputs = { + "X": X, + "static": None, + "timestamps": times, + "lengths": lengths, + "missing_mask": missing_mask, + } + + return inputs + def classify(self, X): X = self.check_input(self.n_steps, self.n_features, X) self.model.eval() # set the model as eval status to freeze it. @@ -745,26 +797,7 @@ def classify(self, X): with torch.no_grad(): for idx, data in enumerate(test_loader): - # cannot use input_data_processing, cause here has no label - indices, X, X_filledLOCF, missing_mask, deltas, empirical_mean = data - # assemble input data - - bz, n_steps, n_features = X.shape - lengths = torch.tensor([n_steps] * bz, dtype=torch.float) - times = torch.tensor(range(n_steps), dtype=torch.float).repeat(bz, 1) - - X = X.permute(1, 0, 2) - missing_mask = missing_mask.permute(1, 0, 2) - times = times.permute(1, 0) - - inputs = { - "X": X, - "static": None, - "timestamps": times, - "lengths": lengths, - "missing_mask": missing_mask, - } - + inputs = self.assemble_input_for_testing(data) prediction = self.model.classify(inputs) prediction_collector.append(prediction) diff --git a/pypots/clustering/base.py b/pypots/clustering/base.py index 30f69f49..f3cc8c2e 100644 --- a/pypots/clustering/base.py +++ b/pypots/clustering/base.py @@ -71,7 +71,59 @@ def __init__( self.n_clusters = n_clusters @abstractmethod - def assemble_input_data(self, data): + def assemble_input_for_training(self, data) -> dict: + """Assemble the given data into a dictionary for training input. + + Parameters + ---------- + data : list, + Input data from dataloader, should be list. + + Returns + ------- + dict, + A python dictionary contains the input data for model training. + """ + pass + + @abstractmethod + def assemble_input_for_validating(self, data) -> dict: + """Assemble the given data into a dictionary for validating input. + + Parameters + ---------- + data : list, + Data output from dataloader, should be list. + + Returns + ------- + dict, + A python dictionary contains the input data for model validating. + """ + pass + + @abstractmethod + def assemble_input_for_testing(self, data) -> dict: + """Assemble the given data into a dictionary for testing input. + + Notes + ----- + The processing functions of train/val/test stages are separated for the situation that the input of + the three stages are different, and this situation usually happens when the Dataset/Dataloader classes + used in the train/val/test stages are not the same, e.g. the training data and validating data in a + classification task contains labels, but the testing data (from the production environment) generally + doesn't have labels. + + Parameters + ---------- + data : list, + Data output from dataloader, should be list. + + Returns + ------- + dict, + A python dictionary contains the input data for model testing. + """ pass def _train_model(self, training_loader, val_loader=None): @@ -88,7 +140,7 @@ def _train_model(self, training_loader, val_loader=None): self.model.train() epoch_train_loss_collector = [] for idx, data in enumerate(training_loader): - inputs = self.assemble_input_data(data) + inputs = self.assemble_input_for_training(data) self.optimizer.zero_grad() results = self.model.forward(inputs) results["loss"].backward() @@ -105,7 +157,7 @@ def _train_model(self, training_loader, val_loader=None): epoch_val_loss_collector = [] with torch.no_grad(): for idx, data in enumerate(val_loader): - inputs = self.assemble_input_data(data) + inputs = self.assemble_input_for_validating(data) results = self.model.forward(inputs) epoch_val_loss_collector.append(results["loss"].item()) diff --git a/pypots/clustering/crli.py b/pypots/clustering/crli.py index f6305a41..b0bd9723 100644 --- a/pypots/clustering/crli.py +++ b/pypots/clustering/crli.py @@ -363,19 +363,20 @@ def fit(self, train_X): self.model.eval() # set the model as eval status to freeze it. return self - def assemble_input_data(self, data): - """Assemble the input data into a dictionary. + def assemble_input_for_training(self, data): + """Assemble the given data into a dictionary for training input. Parameters ---------- - data : list - A list containing data fetched from Dataset by Dataload. + data : list, + A list containing data fetched from Dataset by Dataloader. Returns ------- - inputs : dict - A dictionary with data assembled. + inputs : dict, + A python dictionary contains the input data for model training. """ + # fetch data indices, X, _, missing_mask, _, _ = data @@ -383,8 +384,48 @@ def assemble_input_data(self, data): "X": X, "missing_mask": missing_mask, } + return inputs + def assemble_input_for_validating(self, data) -> dict: + """Assemble the given data into a dictionary for validating input. + + Notes + ----- + The validating data assembling processing is the same as training data assembling. + + + Parameters + ---------- + data : list, + A list containing data fetched from Dataset by Dataloader. + + Returns + ------- + inputs : dict, + A python dictionary contains the input data for model validating. + """ + return self.assemble_input_for_training(data) + + def assemble_input_for_testing(self, data) -> dict: + """Assemble the given data into a dictionary for testing input. + + Notes + ----- + The testing data assembling processing is the same as training data assembling. + + Parameters + ---------- + data : list, + A list containing data fetched from Dataset by Dataloader. + + Returns + ------- + inputs : dict, + A python dictionary contains the input data for model testing. + """ + return self.assemble_input_for_training(data) + def _train_model(self, training_loader, val_loader=None): self.G_optimizer = torch.optim.Adam( [ @@ -410,7 +451,7 @@ def _train_model(self, training_loader, val_loader=None): epoch_train_loss_G_collector = [] epoch_train_loss_D_collector = [] for idx, data in enumerate(training_loader): - inputs = self.assemble_input_data(data) + inputs = self.assemble_input_for_training(data) for _ in range(self.D_steps): self.D_optimizer.zero_grad() @@ -483,7 +524,7 @@ def cluster(self, X): with torch.no_grad(): for idx, data in enumerate(test_loader): - inputs = self.assemble_input_data(data) + inputs = self.assemble_input_for_testing(data) inputs = self.model.cluster(inputs) latent_collector.append(inputs["fcn_latent"]) diff --git a/pypots/clustering/vader.py b/pypots/clustering/vader.py index 7922daac..14f682fe 100644 --- a/pypots/clustering/vader.py +++ b/pypots/clustering/vader.py @@ -389,19 +389,20 @@ def fit(self, train_X): self.model.eval() # set the model as eval status to freeze it. return self - def assemble_input_data(self, data): - """Assemble the input data into a dictionary. + def assemble_input_for_training(self, data): + """Assemble the given data into a dictionary for training input. Parameters ---------- - data : list - A list containing data fetched from Dataset by Dataload. + data : list, + A list containing data fetched from Dataset by Dataloader. Returns ------- - inputs : dict - A dictionary with data assembled. + inputs : dict, + A python dictionary contains the input data for model training. """ + # fetch data indices, X, _, missing_mask, _, _ = data @@ -409,8 +410,48 @@ def assemble_input_data(self, data): "X": X, "missing_mask": missing_mask, } + return inputs + def assemble_input_for_validating(self, data) -> dict: + """Assemble the given data into a dictionary for validating input. + + Notes + ----- + The validating data assembling processing is the same as training data assembling. + + + Parameters + ---------- + data : list, + A list containing data fetched from Dataset by Dataloader. + + Returns + ------- + inputs : dict, + A python dictionary contains the input data for model validating. + """ + return self.assemble_input_for_training(data) + + def assemble_input_for_testing(self, data) -> dict: + """Assemble the given data into a dictionary for testing input. + + Notes + ----- + The testing data assembling processing is the same as training data assembling. + + Parameters + ---------- + data : list, + A list containing data fetched from Dataset by Dataloader. + + Returns + ------- + inputs : dict, + A python dictionary contains the input data for model testing. + """ + return self.assemble_input_for_training(data) + def _train_model(self, training_loader, val_loader=None): self.optimizer = torch.optim.Adam( self.model.parameters(), lr=self.lr, weight_decay=self.weight_decay @@ -424,7 +465,7 @@ def _train_model(self, training_loader, val_loader=None): for epoch in range(self.pretrain_epochs): self.model.train() for idx, data in enumerate(training_loader): - inputs = self.assemble_input_data(data) + inputs = self.assemble_input_for_training(data) self.optimizer.zero_grad() results = self.model.forward(inputs, pretrain=True) results["loss"].backward() @@ -433,7 +474,7 @@ def _train_model(self, training_loader, val_loader=None): sample_collector = [] for _ in range(10): # sampling 10 times for idx, data in enumerate(training_loader): - inputs = self.assemble_input_data(data) + inputs = self.assemble_input_for_validating(data) results = self.model.forward(inputs, pretrain=True) sample_collector.append(results["z"]) samples = torch.cat(sample_collector).cpu().detach().numpy() @@ -456,7 +497,7 @@ def _train_model(self, training_loader, val_loader=None): self.model.train() epoch_train_loss_collector = [] for idx, data in enumerate(training_loader): - inputs = self.assemble_input_data(data) + inputs = self.assemble_input_for_training(data) self.optimizer.zero_grad() results = self.model.forward(inputs) results["loss"].backward() @@ -473,7 +514,7 @@ def _train_model(self, training_loader, val_loader=None): epoch_val_loss_collector = [] with torch.no_grad(): for idx, data in enumerate(val_loader): - inputs = self.assemble_input_data(data) + inputs = self.assemble_input_for_validating(data) results = self.model.forward(inputs) epoch_val_loss_collector.append(results["loss"].item()) @@ -525,7 +566,7 @@ def cluster(self, X): with torch.no_grad(): for idx, data in enumerate(test_loader): - inputs = self.assemble_input_data(data) + inputs = self.assemble_input_for_testing(data) results = self.model.cluster(inputs) clustering_results_collector.append(results) diff --git a/pypots/forecasting/base.py b/pypots/forecasting/base.py index 64beadde..282b0336 100644 --- a/pypots/forecasting/base.py +++ b/pypots/forecasting/base.py @@ -14,6 +14,7 @@ from pypots.base import BaseModel, BaseNNModel from pypots.utils.logging import logger + class BaseForecaster(BaseModel): """Abstract class for all forecasting models.""" @@ -62,7 +63,59 @@ def __init__( ) @abstractmethod - def assemble_input_data(self, data): + def assemble_input_for_training(self, data) -> dict: + """Assemble the given data into a dictionary for training input. + + Parameters + ---------- + data : list, + Input data from dataloader, should be list. + + Returns + ------- + dict, + A python dictionary contains the input data for model training. + """ + pass + + @abstractmethod + def assemble_input_for_validating(self, data) -> dict: + """Assemble the given data into a dictionary for validating input. + + Parameters + ---------- + data : list, + Data output from dataloader, should be list. + + Returns + ------- + dict, + A python dictionary contains the input data for model validating. + """ + pass + + @abstractmethod + def assemble_input_for_testing(self, data) -> dict: + """Assemble the given data into a dictionary for testing input. + + Notes + ----- + The processing functions of train/val/test stages are separated for the situation that the input of + the three stages are different, and this situation usually happens when the Dataset/Dataloader classes + used in the train/val/test stages are not the same, e.g. the training data and validating data in a + classification task contains labels, but the testing data (from the production environment) generally + doesn't have labels. + + Parameters + ---------- + data : list, + Data output from dataloader, should be list. + + Returns + ------- + dict, + A python dictionary contains the input data for model testing. + """ pass def _train_model(self, training_loader, val_loader=None): @@ -79,7 +132,7 @@ def _train_model(self, training_loader, val_loader=None): self.model.train() epoch_train_loss_collector = [] for idx, data in enumerate(training_loader): - inputs = self.assemble_input_data(data) + inputs = self.assemble_input_for_training(data) self.optimizer.zero_grad() results = self.model.forward(inputs) results["loss"].backward() @@ -96,7 +149,7 @@ def _train_model(self, training_loader, val_loader=None): epoch_val_loss_collector = [] with torch.no_grad(): for idx, data in enumerate(val_loader): - inputs = self.assemble_input_data(data) + inputs = self.assemble_input_for_validating(data) results = self.model.forward(inputs) epoch_val_loss_collector.append(results["loss"].item()) diff --git a/pypots/imputation/base.py b/pypots/imputation/base.py index a7290e14..e62ae50c 100644 --- a/pypots/imputation/base.py +++ b/pypots/imputation/base.py @@ -12,8 +12,8 @@ import torch from pypots.base import BaseModel, BaseNNModel -from pypots.utils.metrics import cal_mae from pypots.utils.logging import logger +from pypots.utils.metrics import cal_mae try: import nni @@ -71,7 +71,59 @@ def __init__( ) @abstractmethod - def assemble_input_data(self, data): + def assemble_input_for_training(self, data) -> dict: + """Assemble the given data into a dictionary for training input. + + Parameters + ---------- + data : list, + Input data from dataloader, should be list. + + Returns + ------- + dict, + A python dictionary contains the input data for model training. + """ + pass + + @abstractmethod + def assemble_input_for_validating(self, data) -> dict: + """Assemble the given data into a dictionary for validating input. + + Parameters + ---------- + data : list, + Data output from dataloader, should be list. + + Returns + ------- + dict, + A python dictionary contains the input data for model validating. + """ + pass + + @abstractmethod + def assemble_input_for_testing(self, data) -> dict: + """Assemble the given data into a dictionary for testing input. + + Notes + ----- + The processing functions of train/val/test stages are separated for the situation that the input of + the three stages are different, and this situation usually happens when the Dataset/Dataloader classes + used in the train/val/test stages are not the same, e.g. the training data and validating data in a + classification task contains labels, but the testing data (from the production environment) generally + doesn't have labels. + + Parameters + ---------- + data : list, + Data output from dataloader, should be list. + + Returns + ------- + dict, + A python dictionary contains the input data for model testing. + """ pass def _train_model( @@ -94,7 +146,7 @@ def _train_model( self.model.train() epoch_train_loss_collector = [] for idx, data in enumerate(training_loader): - inputs = self.assemble_input_data(data) + inputs = self.assemble_input_for_training(data) self.optimizer.zero_grad() results = self.model.forward(inputs) results["loss"].backward() @@ -111,7 +163,7 @@ def _train_model( imputation_collector = [] with torch.no_grad(): for idx, data in enumerate(val_loader): - inputs = self.assemble_input_data(data) + inputs = self.assemble_input_for_validating(data) results = self.model.forward(inputs) imputation_collector.append(results["imputed_data"]) diff --git a/pypots/imputation/brits.py b/pypots/imputation/brits.py index 46587d81..d15c8e33 100644 --- a/pypots/imputation/brits.py +++ b/pypots/imputation/brits.py @@ -537,25 +537,31 @@ def fit(self, train_X, val_X=None): self.model.eval() # set the model as eval status to freeze it. return self - def assemble_input_data(self, data): - """Assemble the input data into a dictionary. + def assemble_input_for_training(self, data): + """Assemble the given data into a dictionary for training input. Parameters ---------- - data : list - A list containing data fetched from Dataset by Dataload. + data : list, + A list containing data fetched from Dataset by Dataloader. Returns ------- - inputs : dict - A dictionary with data assembled. + inputs : dict, + A python dictionary contains the input data for model training. """ + # fetch data indices, X, missing_mask, deltas, back_X, back_missing_mask, back_deltas = data + # assemble input data inputs = { "indices": indices, - "forward": {"X": X, "missing_mask": missing_mask, "deltas": deltas}, + "forward": { + "X": X, + "missing_mask": missing_mask, + "deltas": deltas, + }, "backward": { "X": back_X, "missing_mask": back_missing_mask, @@ -565,6 +571,45 @@ def assemble_input_data(self, data): return inputs + def assemble_input_for_validating(self, data) -> dict: + """Assemble the given data into a dictionary for validating input. + + Notes + ----- + The validating data assembling processing is the same as training data assembling. + + + Parameters + ---------- + data : list, + A list containing data fetched from Dataset by Dataloader. + + Returns + ------- + inputs : dict, + A python dictionary contains the input data for model validating. + """ + return self.assemble_input_for_training(data) + + def assemble_input_for_testing(self, data) -> dict: + """Assemble the given data into a dictionary for testing input. + + Notes + ----- + The testing data assembling processing is the same as training data assembling. + + Parameters + ---------- + data : list, + A list containing data fetched from Dataset by Dataloader. + + Returns + ------- + inputs : dict, + A python dictionary contains the input data for model testing. + """ + return self.assemble_input_for_training(data) + def impute(self, X): X = self.check_input(self.n_steps, self.n_features, X) self.model.eval() # set the model as eval status to freeze it. @@ -574,7 +619,7 @@ def impute(self, X): with torch.no_grad(): for idx, data in enumerate(test_loader): - inputs = self.assemble_input_data(data) + inputs = self.assemble_input_for_testing(data) imputed_data = self.model.impute(inputs) imputation_collector.append(imputed_data) diff --git a/pypots/imputation/saits.py b/pypots/imputation/saits.py index 3badbbbe..d32bd0ab 100644 --- a/pypots/imputation/saits.py +++ b/pypots/imputation/saits.py @@ -239,19 +239,20 @@ def fit(self, train_X, val_X=None): self.model.load_state_dict(self.best_model_dict) self.model.eval() # set the model as eval status to freeze it. - def assemble_input_data(self, data): - """Assemble the input data into a dictionary. + def assemble_input_for_training(self, data): + """Assemble the given data into a dictionary for training input. Parameters ---------- - data : list - A list containing data fetched from Dataset by Dataload. + data : list, + A list containing data fetched from Dataset by Dataloader. Returns ------- - inputs : dict - A dictionary with data assembled. + inputs : dict, + A python dictionary contains the input data for model training. """ + indices, X_intact, X, missing_mask, indicating_mask = data inputs = { @@ -263,6 +264,45 @@ def assemble_input_data(self, data): return inputs + def assemble_input_for_validating(self, data) -> dict: + """Assemble the given data into a dictionary for validating input. + + Notes + ----- + The validating data assembling processing is the same as training data assembling. + + + Parameters + ---------- + data : list, + A list containing data fetched from Dataset by Dataloader. + + Returns + ------- + inputs : dict, + A python dictionary contains the input data for model validating. + """ + return self.assemble_input_for_training(data) + + def assemble_input_for_testing(self, data) -> dict: + """Assemble the given data into a dictionary for testing input. + + Notes + ----- + The testing data assembling processing is the same as training data assembling. + + Parameters + ---------- + data : list, + A list containing data fetched from Dataset by Dataloader. + + Returns + ------- + inputs : dict, + A python dictionary contains the input data for model testing. + """ + return self.assemble_input_for_training(data) + def impute(self, X): X = self.check_input(self.n_steps, self.n_features, X) self.model.eval() # set the model as eval status to freeze it. diff --git a/pypots/imputation/transformer.py b/pypots/imputation/transformer.py index 8146a266..c84c30b1 100644 --- a/pypots/imputation/transformer.py +++ b/pypots/imputation/transformer.py @@ -330,18 +330,18 @@ def fit(self, train_X, val_X=None): self.model.eval() # set the model as eval status to freeze it. return self - def assemble_input_data(self, data): - """Assemble the input data into a dictionary. + def assemble_input_for_training(self, data): + """Assemble the given data into a dictionary for training input. Parameters ---------- - data : list - A list containing data fetched from Dataset by Dataload. + data : list, + A list containing data fetched from Dataset by Dataloader. Returns ------- - inputs : dict - A dictionary with data assembled. + inputs : dict, + A python dictionary contains the input data for model training. """ indices, X_intact, X, missing_mask, indicating_mask = data @@ -355,6 +355,45 @@ def assemble_input_data(self, data): return inputs + def assemble_input_for_validating(self, data) -> dict: + """Assemble the given data into a dictionary for validating input. + + Notes + ----- + The validating data assembling processing is the same as training data assembling. + + + Parameters + ---------- + data : list, + A list containing data fetched from Dataset by Dataloader. + + Returns + ------- + inputs : dict, + A python dictionary contains the input data for model validating. + """ + return self.assemble_input_for_training(data) + + def assemble_input_for_testing(self, data) -> dict: + """Assemble the given data into a dictionary for testing input. + + Notes + ----- + The testing data assembling processing is the same as training data assembling. + + Parameters + ---------- + data : list, + A list containing data fetched from Dataset by Dataloader. + + Returns + ------- + inputs : dict, + A python dictionary contains the input data for model testing. + """ + return self.assemble_input_for_training(data) + def impute(self, X): X = self.check_input(self.n_steps, self.n_features, X) self.model.eval() # set the model as eval status to freeze it.