diff --git a/README.md b/README.md
index 7559a304..686b3042 100644
--- a/README.md
+++ b/README.md
@@ -5,12 +5,16 @@
 
 <p align='center'>
     <!-- Python version -->
-    <img src='https://img.shields.io/badge/python-v3-yellowgreen'>
+    <img src='https://img.shields.io/badge/python-v3-yellow'>
     <!-- PyTorch-->
-    <img src='https://img.shields.io/static/v1?label=%E2%9D%A4%EF%B8%8F&message=PyTorch&color=DC583A'>
+    <img src='https://img.shields.io/static/v1?label=PyTorch&message=%E2%9D%A4%EF%B8%8F&color=DC583A&logo=pytorch'>
     <!-- PyPI version -->
     <a alt='PyPI download number' href='https://pypi.org/project/pypots'>
-        <img alt="PyPI" src="https://img.shields.io/pypi/v/pypots?color=green&label=PyPI">
+        <img alt="PyPI" src="https://img.shields.io/pypi/v/pypots?color=yellowgreen&label=PyPI&logo=pypi&logoColor=white">
+    </a>
+    <!-- on Anaconda -->
+    <a alt='on anaconda' href='https://anaconda.org/conda-forge/pypots'>
+        <img alt="on anaconda" src="https://img.shields.io/conda/vn/conda-forge/pypots?color=green&label=Conda&logo=anaconda" />
     </a>
     <!-- License -->
     <a alt='GPL3 license' href='https://github.com/WenjieDu/PyPOTS/blob/main/LICENSE'>
@@ -26,7 +30,7 @@
     </a>
     <!-- PyPI download number -->
     <a alt='PyPI download number' href='https://pepy.tech/project/pypots'>
-        <img src='https://static.pepy.tech/personalized-badge/pypots?period=total&units=none&left_color=grey&right_color=blue&left_text=Downloads'>
+        <img src='https://static.pepy.tech/personalized-badge/pypots?period=total&units=international_system&left_color=grey&right_color=blue&left_text=Downloads'>
     </a>
     <!-- Zenodo DOI -->
     <a alt='Zenodo DOI' href='https://zenodo.org/badge/latestdoi/475477908'>
@@ -53,6 +57,10 @@ Visit [TSDB](https://github.com/WenjieDu/TSDB) right now to know more about this
 <br clear='left'>
 
 ## ❖ Installation
+PyPOTS now is available on <a alt='Anaconda' href='https://anaconda.org/conda-forge/pypots'><img align='center' src='https://img.shields.io/badge/Anaconda--lightgreen?style=social&logo=anaconda'></a>❗️ 
+
+Install it with `conda install pypots`, you may need to specify the channel with option `-c conda-forge`
+
 Install the latest release from PyPI:
 > pip install pypots
 
@@ -121,16 +129,16 @@ Thank you all for your attention! 😃
 
 
 [^1]: Du, W., Cote, D., & Liu, Y. (2023). [SAITS: Self-Attention-based Imputation for Time Series](https://doi.org/10.1016/j.eswa.2023.119619). *Expert systems with applications*.
-[^2]: Vaswani, A., Shazeer, N.M., Parmar, N., Uszkoreit, J., Jones, L., Gomez, A.N., Kaiser, L., & Polosukhin, I. (2017). [Attention is All you Need](https://papers.nips.cc/paper/2017/hash/3f5ee243547dee91fbd053c1c4a845aa-Abstract.html). *NeurIPS* 2017.
-[^3]: Cao, W., Wang, D., Li, J., Zhou, H., Li, L., & Li, Y. (2018). [BRITS: Bidirectional Recurrent Imputation for Time Series](https://papers.nips.cc/paper/2018/hash/734e6bfcd358e25ac1db0a4241b95651-Abstract.html). *NeurIPS* 2018.
+[^2]: Vaswani, A., Shazeer, N.M., Parmar, N., Uszkoreit, J., Jones, L., Gomez, A.N., Kaiser, L., & Polosukhin, I. (2017). [Attention is All you Need](https://papers.nips.cc/paper/2017/hash/3f5ee243547dee91fbd053c1c4a845aa-Abstract.html). *NeurIPS 2017*.
+[^3]: Cao, W., Wang, D., Li, J., Zhou, H., Li, L., & Li, Y. (2018). [BRITS: Bidirectional Recurrent Imputation for Time Series](https://papers.nips.cc/paper/2018/hash/734e6bfcd358e25ac1db0a4241b95651-Abstract.html). *NeurIPS 2018*.
 [^4]: Che, Z., Purushotham, S., Cho, K., Sontag, D.A., & Liu, Y. (2018). [Recurrent Neural Networks for Multivariate Time Series with Missing Values](https://www.nature.com/articles/s41598-018-24271-9). *Scientific Reports*.
-[^5]: Zhang, X., Zeman, M., Tsiligkaridis, T., & Zitnik, M. (2022). [Graph-Guided Network for Irregularly Sampled Multivariate Time Series](https://arxiv.org/abs/2110.05357). *ICLR* 2022.
-[^6]: Ma, Q., Chen, C., Li, S., & Cottrell, G. W. (2021). [Learning Representations for Incomplete Time Series Clustering](https://ojs.aaai.org/index.php/AAAI/article/view/17070). *AAAI* 2021.
+[^5]: Zhang, X., Zeman, M., Tsiligkaridis, T., & Zitnik, M. (2022). [Graph-Guided Network for Irregularly Sampled Multivariate Time Series](https://arxiv.org/abs/2110.05357). *ICLR 2022*.
+[^6]: Ma, Q., Chen, C., Li, S., & Cottrell, G. W. (2021). [Learning Representations for Incomplete Time Series Clustering](https://ojs.aaai.org/index.php/AAAI/article/view/17070). *AAAI 2021*.
 [^7]: Jong, J.D., Emon, M.A., Wu, P., Karki, R., Sood, M., Godard, P., Ahmad, A., Vrooman, H.A., Hofmann-Apitius, M., & Fröhlich, H. (2019). [Deep learning for clustering of multivariate clinical patient trajectories with missing values](https://academic.oup.com/gigascience/article/8/11/giz134/5626377). *GigaScience*.
 [^8]: Chen, X., & Sun, L. (2021). [Bayesian Temporal Factorization for Multidimensional Time Series Prediction](https://arxiv.org/abs/1910.06366). *IEEE transactions on pattern analysis and machine intelligence*.
 
 <details>
 <summary>🏠 Visits</summary>
-<img align='left' src='https://hits.seeyoufarm.com/api/count/incr/badge.svg?url=https%3A%2F%2Fgithub.com%2FPyPOTS%2FPyPOTS&count_bg=%23009A0A&title_bg=%23555555&icon=&icon_color=%23E7E7E7&title=Visits+since+April+2022&edge_flat=false'>
+<img align='left' src='https://hits.seeyoufarm.com/api/count/incr/badge.svg?url=https%3A%2F%2Fgithub.com%2FPyPOTS%2FPyPOTS&count_bg=%23009A0A&title_bg=%23555555&icon=&icon_color=%23E7E7E7&title=Hits&edge_flat=false'>
 </details>
 
diff --git a/docs/index.rst b/docs/index.rst
index 07a62304..435125bf 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -97,8 +97,8 @@ Below is an example applying SAITS in PyPOTS to impute missing values in the dat
 ============================== ================ =========================================================================  ====== =========
 Task                           Type             Algorithm                                                                  Year   Reference
 ============================== ================ =========================================================================  ====== =========
-Imputation                     Neural Network   SAITS (Self-Attention-based Imputation for Time Series)                    2022   :cite:`du2022SAITS`
-Imputation                     Neural Network   Transformer                                                                2017   :cite:`vaswani2017Transformer`, :cite:`du2022SAITS`
+Imputation                     Neural Network   SAITS (Self-Attention-based Imputation for Time Series)                    2022   :cite:`du2023SAITS`
+Imputation                     Neural Network   Transformer                                                                2017   :cite:`vaswani2017Transformer`, :cite:`du2023SAITS`
 Imputation, Classification     Neural Network   BRITS (Bidirectional Recurrent Imputation for Time Series)                 2018   :cite:`cao2018BRITS`
 Imputation                     Naive            LOCF (Last Observation Carried Forward)                                    /      /
 Classification                 Neural Network   GRU-D                                                                      2018   :cite:`che2018GRUD`
diff --git a/docs/references.bib b/docs/references.bib
index 9a06b474..a0735aa6 100644
--- a/docs/references.bib
+++ b/docs/references.bib
@@ -1,403 +1,400 @@
 @article{cao2018BRITS,
-  title = {{{BRITS}}: {{Bidirectional Recurrent Imputation}} for {{Time Series}}},
-  author = {Cao, Wei and Wang, Dong and Li, Jian and Zhou, Hao and Li, Lei and Li, Yitan},
-  year = {2018},
-  month = may,
-  journal = {arXiv:1805.10572 [cs, stat]},
-  eprint = {1805.10572},
-  eprinttype = {arxiv},
-  primaryclass = {cs, stat},
-  url = {http://arxiv.org/abs/1805.10572},
-  archiveprefix = {arXiv},
-  keywords = {Computer Science - Machine Learning,Statistics - Machine Learning}
+title = {{{BRITS}}: {{Bidirectional Recurrent Imputation}} for {{Time Series}}},
+author = {Cao, Wei and Wang, Dong and Li, Jian and Zhou, Hao and Li, Lei and Li, Yitan},
+year = {2018},
+month = may,
+journal = {arXiv:1805.10572 [cs, stat]},
+eprint = {1805.10572},
+eprinttype = {arxiv},
+primaryclass = {cs, stat},
+url = {http://arxiv.org/abs/1805.10572},
+archiveprefix = {arXiv},
+keywords = {Computer Science - Machine Learning,Statistics - Machine Learning}
 }
 
 @article{che2018GRUD,
-  title = {Recurrent {{Neural Networks}} for {{Multivariate Time Series}} with {{Missing Values}}},
-  author = {Che, Zhengping and Purushotham, Sanjay and Cho, Kyunghyun and Sontag, David and Liu, Yan},
-  year = {2018},
-  month = apr,
-  journal = {Scientific Reports},
-  volume = {8},
-  number = {1},
-  pages = {6085},
-  publisher = {{Nature Publishing Group}},
-  issn = {2045-2322},
-  doi = {10.1038/s41598-018-24271-9},
-  url = {https://www.nature.com/articles/s41598-018-24271-9},
-  copyright = {2018 The Author(s)}
+title = {Recurrent {{Neural Networks}} for {{Multivariate Time Series}} with {{Missing Values}}},
+author = {Che, Zhengping and Purushotham, Sanjay and Cho, Kyunghyun and Sontag, David and Liu, Yan},
+year = {2018},
+month = apr,
+journal = {Scientific Reports},
+volume = {8},
+number = {1},
+pages = {6085},
+publisher = {{Nature Publishing Group}},
+issn = {2045-2322},
+doi = {10.1038/s41598-018-24271-9},
+url = {https://www.nature.com/articles/s41598-018-24271-9},
+copyright = {2018 The Author(s)}
 }
 
 @article{chen2021BTMF,
-  title = {Bayesian {{Temporal Factorization}} for {{Multidimensional Time Series Prediction}}},
-  author = {Chen, Xinyu and Sun, Lijun},
-  year = {2021},
-  journal = {IEEE Transactions on Pattern Analysis and Machine Intelligence},
-  eprint = {1910.06366},
-  eprinttype = {arxiv},
-  pages = {1--1},
-  issn = {0162-8828, 2160-9292, 1939-3539},
-  doi = {10.1109/TPAMI.2021.3066551},
-  url = {http://arxiv.org/abs/1910.06366},
-  archiveprefix = {arXiv},
-  keywords = {Computer Science - Machine Learning,Statistics - Machine Learning}
+title = {Bayesian {{Temporal Factorization}} for {{Multidimensional Time Series Prediction}}},
+author = {Chen, Xinyu and Sun, Lijun},
+year = {2021},
+journal = {IEEE Transactions on Pattern Analysis and Machine Intelligence},
+eprint = {1910.06366},
+eprinttype = {arxiv},
+pages = {1--1},
+issn = {0162-8828, 2160-9292, 1939-3539},
+doi = {10.1109/TPAMI.2021.3066551},
+url = {http://arxiv.org/abs/1910.06366},
+archiveprefix = {arXiv},
+keywords = {Computer Science - Machine Learning,Statistics - Machine Learning}
 }
 
 @article{choi2020RDISRandom,
-  title = {{{RDIS}}: {{Random Drop Imputation}} with {{Self-Training}} for {{Incomplete Time Series Data}}},
-  author = {Choi, Tae-Min and Kang, Ji-Su and Kim, Jong-Hwan},
-  year = {2020},
-  month = oct,
-  journal = {arXiv:2010.10075 [cs, stat]},
-  eprint = {2010.10075},
-  eprinttype = {arxiv},
-  primaryclass = {cs, stat},
-  url = {http://arxiv.org/abs/2010.10075},
-  archiveprefix = {arXiv},
-  keywords = {Computer Science - Machine Learning,Statistics - Machine Learning}
+title = {{{RDIS}}: {{Random Drop Imputation}} with {{Self-Training}} for {{Incomplete Time Series Data}}},
+author = {Choi, Tae-Min and Kang, Ji-Su and Kim, Jong-Hwan},
+year = {2020},
+month = oct,
+journal = {arXiv:2010.10075 [cs, stat]},
+eprint = {2010.10075},
+eprinttype = {arxiv},
+primaryclass = {cs, stat},
+url = {http://arxiv.org/abs/2010.10075},
+archiveprefix = {arXiv},
+keywords = {Computer Science - Machine Learning,Statistics - Machine Learning}
 }
 
 @article{cini2021MultivariateTime,
-  title = {Multivariate {{Time Series Imputation}} by {{Graph Neural Networks}}},
-  author = {Cini, Andrea and Marisca, Ivan and Alippi, Cesare},
-  year = {2021},
-  month = sep,
-  journal = {arXiv:2108.00298 [cs]},
-  eprint = {2108.00298},
-  eprinttype = {arxiv},
-  primaryclass = {cs},
-  url = {http://arxiv.org/abs/2108.00298},
-  archiveprefix = {arXiv},
-  keywords = {Computer Science - Artificial Intelligence,Computer Science - Machine Learning}
+title = {Multivariate {{Time Series Imputation}} by {{Graph Neural Networks}}},
+author = {Cini, Andrea and Marisca, Ivan and Alippi, Cesare},
+year = {2021},
+month = sep,
+journal = {arXiv:2108.00298 [cs]},
+eprint = {2108.00298},
+eprinttype = {arxiv},
+primaryclass = {cs},
+url = {http://arxiv.org/abs/2108.00298},
+archiveprefix = {arXiv},
+keywords = {Computer Science - Artificial Intelligence,Computer Science - Machine Learning}
 }
 
 @inproceedings{costa2018MissingData,
-  title = {Missing {{Data Imputation}} via {{Denoising Autoencoders}}: {{The Untold Story}}},
-  booktitle = {Advances in {{Intelligent Data Analysis XVII}}},
-  author = {Costa, Adriana Fonseca and Santos, Miriam Seoane and Soares, Jastin Pompeu and Abreu, Pedro Henriques},
-  editor = {Duivesteijn, Wouter and Siebes, Arno and Ukkonen, Antti},
-  year = {2018},
-  series = {Lecture {{Notes}} in {{Computer Science}}},
-  pages = {87--98},
-  publisher = {{Springer International Publishing}},
-  address = {{Cham}},
-  doi = {10.1007/978-3-030-01768-2_8},
-  isbn = {978-3-030-01768-2},
-  keywords = {Data imputation,Denoising autoencoders,Missing data,Missing mechanisms}
+title = {Missing {{Data Imputation}} via {{Denoising Autoencoders}}: {{The Untold Story}}},
+booktitle = {Advances in {{Intelligent Data Analysis XVII}}},
+author = {Costa, Adriana Fonseca and Santos, Miriam Seoane and Soares, Jastin Pompeu and Abreu, Pedro Henriques},
+editor = {Duivesteijn, Wouter and Siebes, Arno and Ukkonen, Antti},
+year = {2018},
+series = {Lecture {{Notes}} in {{Computer Science}}},
+pages = {87--98},
+publisher = {{Springer International Publishing}},
+address = {{Cham}},
+doi = {10.1007/978-3-030-01768-2_8},
+isbn = {978-3-030-01768-2},
+keywords = {Data imputation,Denoising autoencoders,Missing data,Missing mechanisms}
 }
 
 @article{dejong2019VaDER,
-  title = {Deep Learning for Clustering of Multivariate Clinical Patient Trajectories with Missing Values},
-  author = {{de~Jong}, Johann and Emon, Mohammad Asif and Wu, Ping and Karki, Reagon and Sood, Meemansa and Godard, Patrice and Ahmad, Ashar and Vrooman, Henri and {Hofmann-Apitius}, Martin and Fr{\"o}hlich, Holger},
-  year = {2019},
-  month = nov,
-  journal = {GigaScience},
-  volume = {8},
-  number = {11},
-  pages = {giz134},
-  issn = {2047-217X},
-  doi = {10.1093/gigascience/giz134},
-  url = {https://doi.org/10.1093/gigascience/giz134}
+title = {Deep Learning for Clustering of Multivariate Clinical Patient Trajectories with Missing Values},
+author = {{de~Jong}, Johann and Emon, Mohammad Asif and Wu, Ping and Karki, Reagon and Sood, Meemansa and Godard, Patrice and Ahmad, Ashar and Vrooman, Henri and {Hofmann-Apitius}, Martin and Fr{\"o}hlich, Holger},
+year = {2019},
+month = nov,
+journal = {GigaScience},
+volume = {8},
+number = {11},
+pages = {giz134},
+issn = {2047-217X},
+doi = {10.1093/gigascience/giz134},
+url = {https://doi.org/10.1093/gigascience/giz134}
 }
 
-@article{du2022SAITS,
-  title = {{{SAITS}}: {{Self-Attention-based Imputation}} for {{Time Series}}},
-  author = {Du, Wenjie and C{\^o}t{\'e}, David and Liu, Yan},
-  year = {2022},
-  month = feb,
-  journal = {arXiv:2202.08516 [cs]},
-  eprint = {2202.08516},
-  eprinttype = {arxiv},
-  primaryclass = {cs},
-  url = {http://arxiv.org/abs/2202.08516},
-  archiveprefix = {arXiv},
-  keywords = {Computer Science - Machine Learning}
+@article{du2023SAITS,
+title = {{SAITS: Self-Attention-based Imputation for Time Series}},
+journal = {Expert Systems with Applications},
+volume = {219},
+pages = {119619},
+year = {2023},
+issn = {0957-4174},
+doi = {https://doi.org/10.1016/j.eswa.2023.119619},
+url = {https://www.sciencedirect.com/science/article/pii/S0957417423001203},
+author = {Wenjie Du and David Cote and Yan Liu},
 }
-
 @article{fortuin2020GPVAEDeep,
-  title = {{{GP-VAE}}: {{Deep Probabilistic Time Series Imputation}}},
-  author = {Fortuin, Vincent and Baranchuk, Dmitry and R{\"a}tsch, Gunnar and Mandt, Stephan},
-  year = {2020},
-  month = feb,
-  journal = {arXiv:1907.04155 [cs, stat]},
-  eprint = {1907.04155},
-  eprinttype = {arxiv},
-  primaryclass = {cs, stat},
-  url = {http://arxiv.org/abs/1907.04155},
-  archiveprefix = {arXiv},
-  keywords = {Computer Science - Machine Learning,Statistics - Machine Learning}
+title = {{{GP-VAE}}: {{Deep Probabilistic Time Series Imputation}}},
+author = {Fortuin, Vincent and Baranchuk, Dmitry and R{\"a}tsch, Gunnar and Mandt, Stephan},
+year = {2020},
+month = feb,
+journal = {arXiv:1907.04155 [cs, stat]},
+eprint = {1907.04155},
+eprinttype = {arxiv},
+primaryclass = {cs, stat},
+url = {http://arxiv.org/abs/1907.04155},
+archiveprefix = {arXiv},
+keywords = {Computer Science - Machine Learning,Statistics - Machine Learning}
 }
 
 @article{horn2019SeFT,
-  title = {Set {{Functions}} for {{Time Series}}},
-  author = {Horn, Max and Moor, Michael and Bock, Christian and Rieck, Bastian and Borgwardt, Karsten},
-  year = {2019},
-  month = sep,
-  url = {https://arxiv.org/abs/1909.12064v3}
+title = {Set {{Functions}} for {{Time Series}}},
+author = {Horn, Max and Moor, Michael and Bock, Christian and Rieck, Bastian and Borgwardt, Karsten},
+year = {2019},
+month = sep,
+url = {https://arxiv.org/abs/1909.12064v3}
 }
 
 @article{hubert1985AdjustedRI,
-  title = {Comparing Partitions},
-  author = {Hubert, Lawrence and Arabie, Phipps},
-  year = {1985},
-  month = dec,
-  journal = {Journal of Classification},
-  volume = {2},
-  number = {1},
-  pages = {193--218},
-  issn = {1432-1343},
-  doi = {10.1007/BF01908075},
-  url = {https://doi.org/10.1007/BF01908075},
-  keywords = {Consensus indices,Measures of agreement,Measures of association}
+title = {Comparing Partitions},
+author = {Hubert, Lawrence and Arabie, Phipps},
+year = {1985},
+month = dec,
+journal = {Journal of Classification},
+volume = {2},
+number = {1},
+pages = {193--218},
+issn = {1432-1343},
+doi = {10.1007/BF01908075},
+url = {https://doi.org/10.1007/BF01908075},
+keywords = {Consensus indices,Measures of agreement,Measures of association}
 }
 
 @article{little1988TestMCAR,
-  title = {A {{Test}} of {{Missing Completely}} at {{Random}} for {{Multivariate Data}} with {{Missing Values}}},
-  author = {Little, Roderick J. A.},
-  year = {1988},
-  journal = {Journal of the American Statistical Association},
-  volume = {83},
-  number = {404},
-  pages = {1198--1202},
-  publisher = {{[American Statistical Association, Taylor \& Francis, Ltd.]}},
-  issn = {0162-1459},
-  doi = {10.2307/2290157},
-  url = {https://www.jstor.org/stable/2290157}
+title = {A {{Test}} of {{Missing Completely}} at {{Random}} for {{Multivariate Data}} with {{Missing Values}}},
+author = {Little, Roderick J. A.},
+year = {1988},
+journal = {Journal of the American Statistical Association},
+volume = {83},
+number = {404},
+pages = {1198--1202},
+publisher = {{[American Statistical Association, Taylor \& Francis, Ltd.]}},
+issn = {0162-1459},
+doi = {10.2307/2290157},
+url = {https://www.jstor.org/stable/2290157}
 }
 
 @inproceedings{liu2019NAOMI,
-  title = {{{NAOMI}}: {{Non-Autoregressive Multiresolution Sequence Imputation}}},
-  booktitle = {{{arXiv}}:1901.10946 [Cs, Stat]},
-  author = {Liu, Yukai and Yu, Rose and Zheng, Stephan and Zhan, Eric and Yue, Yisong},
-  year = {2019},
-  month = oct,
-  eprint = {1901.10946},
-  eprinttype = {arxiv},
-  primaryclass = {cs, stat},
-  url = {http://arxiv.org/abs/1901.10946},
-  archiveprefix = {arXiv},
-  keywords = {Computer Science - Machine Learning,Statistics - Machine Learning}
+title = {{{NAOMI}}: {{Non-Autoregressive Multiresolution Sequence Imputation}}},
+booktitle = {{{arXiv}}:1901.10946 [Cs, Stat]},
+author = {Liu, Yukai and Yu, Rose and Zheng, Stephan and Zhan, Eric and Yue, Yisong},
+year = {2019},
+month = oct,
+eprint = {1901.10946},
+eprinttype = {arxiv},
+primaryclass = {cs, stat},
+url = {http://arxiv.org/abs/1901.10946},
+archiveprefix = {arXiv},
+keywords = {Computer Science - Machine Learning,Statistics - Machine Learning}
 }
 
 @incollection{luo2018MultivariateTime,
-  title = {Multivariate {{Time Series Imputation}} with {{Generative Adversarial Networks}}},
-  booktitle = {Advances in {{Neural Information Processing Systems}} 31},
-  author = {Luo, Yonghong and Cai, Xiangrui and ZHANG, Ying and Xu, Jun and {xiaojie}, Yuan},
-  editor = {Bengio, S. and Wallach, H. and Larochelle, H. and Grauman, K. and {Cesa-Bianchi}, N. and Garnett, R.},
-  year = {2018},
-  pages = {1596--1607},
-  publisher = {{Curran Associates, Inc.}},
-  url = {http://papers.nips.cc/paper/7432-multivariate-time-series-imputation-with-generative-adversarial-networks.pdf}
+title = {Multivariate {{Time Series Imputation}} with {{Generative Adversarial Networks}}},
+booktitle = {Advances in {{Neural Information Processing Systems}} 31},
+author = {Luo, Yonghong and Cai, Xiangrui and ZHANG, Ying and Xu, Jun and {xiaojie}, Yuan},
+editor = {Bengio, S. and Wallach, H. and Larochelle, H. and Grauman, K. and {Cesa-Bianchi}, N. and Garnett, R.},
+year = {2018},
+pages = {1596--1607},
+publisher = {{Curran Associates, Inc.}},
+url = {http://papers.nips.cc/paper/7432-multivariate-time-series-imputation-with-generative-adversarial-networks.pdf}
 }
 
 @article{ma2019CDSA,
-  title = {{{CDSA}}: {{Cross-Dimensional Self-Attention}} for {{Multivariate}}, {{Geo-tagged Time Series Imputation}}},
-  author = {Ma, Jiawei and Shou, Zheng and Zareian, Alireza and Mansour, Hassan and Vetro, Anthony and Chang, Shih-Fu},
-  year = {2019},
-  month = aug,
-  journal = {arXiv:1905.09904 [cs, stat]},
-  eprint = {1905.09904},
-  eprinttype = {arxiv},
-  primaryclass = {cs, stat},
-  url = {http://arxiv.org/abs/1905.09904},
-  archiveprefix = {arXiv},
-  keywords = {Computer Science - Machine Learning,Statistics - Machine Learning}
+title = {{{CDSA}}: {{Cross-Dimensional Self-Attention}} for {{Multivariate}}, {{Geo-tagged Time Series Imputation}}},
+author = {Ma, Jiawei and Shou, Zheng and Zareian, Alireza and Mansour, Hassan and Vetro, Anthony and Chang, Shih-Fu},
+year = {2019},
+month = aug,
+journal = {arXiv:1905.09904 [cs, stat]},
+eprint = {1905.09904},
+eprinttype = {arxiv},
+primaryclass = {cs, stat},
+url = {http://arxiv.org/abs/1905.09904},
+archiveprefix = {arXiv},
+keywords = {Computer Science - Machine Learning,Statistics - Machine Learning}
 }
 
 @article{ma2021CRLI,
-  title = {Learning {{Representations}} for {{Incomplete Time Series Clustering}}},
-  author = {Ma, Qianli and Chen, Chuxin and Li, Sen and Cottrell, Garrison W.},
-  year = {2021},
-  month = may,
-  journal = {Proceedings of the AAAI Conference on Artificial Intelligence},
-  volume = {35},
-  number = {10},
-  pages = {8837--8846},
-  issn = {2374-3468},
-  url = {https://ojs.aaai.org/index.php/AAAI/article/view/17070},
-  copyright = {Copyright (c) 2021 Association for the Advancement of Artificial Intelligence},
-  keywords = {Time-Series/Data Streams}
+title = {Learning {{Representations}} for {{Incomplete Time Series Clustering}}},
+author = {Ma, Qianli and Chen, Chuxin and Li, Sen and Cottrell, Garrison W.},
+year = {2021},
+month = may,
+journal = {Proceedings of the AAAI Conference on Artificial Intelligence},
+volume = {35},
+number = {10},
+pages = {8837--8846},
+issn = {2374-3468},
+url = {https://ojs.aaai.org/index.php/AAAI/article/view/17070},
+copyright = {Copyright (c) 2021 Association for the Advancement of Artificial Intelligence},
+keywords = {Time-Series/Data Streams}
 }
 
 @article{miao2021SSGAN,
-  title = {Generative {{Semi-supervised Learning}} for {{Multivariate Time Series Imputation}}},
-  author = {Miao, Xiaoye and Wu, Yangyang and Wang, Jun and Gao, Yunjun and Mao, Xudong and Yin, Jianwei},
-  year = {2021},
-  month = may,
-  journal = {Proceedings of the AAAI Conference on Artificial Intelligence},
-  volume = {35},
-  number = {10},
-  pages = {8983--8991},
-  issn = {2374-3468},
-  url = {https://ojs.aaai.org/index.php/AAAI/article/view/17086},
-  copyright = {Copyright (c) 2021 Association for the Advancement of Artificial Intelligence},
-  keywords = {Time-Series/Data Streams}
+title = {Generative {{Semi-supervised Learning}} for {{Multivariate Time Series Imputation}}},
+author = {Miao, Xiaoye and Wu, Yangyang and Wang, Jun and Gao, Yunjun and Mao, Xudong and Yin, Jianwei},
+year = {2021},
+month = may,
+journal = {Proceedings of the AAAI Conference on Artificial Intelligence},
+volume = {35},
+number = {10},
+pages = {8983--8991},
+issn = {2374-3468},
+url = {https://ojs.aaai.org/index.php/AAAI/article/view/17086},
+copyright = {Copyright (c) 2021 Association for the Advancement of Artificial Intelligence},
+keywords = {Time-Series/Data Streams}
 }
 
 @article{mikalsen2017TimeSeries,
-  title = {Time {{Series Cluster Kernel}} for {{Learning Similarities}} between {{Multivariate Time Series}} with {{Missing Data}}},
-  author = {Mikalsen, Karl {\O}yvind and Bianchi, Filippo Maria and {Soguero-Ruiz}, Cristina and Jenssen, Robert},
-  year = {2017},
-  month = jun,
-  journal = {arXiv:1704.00794 [cs, stat]},
-  eprint = {1704.00794},
-  eprinttype = {arxiv},
-  primaryclass = {cs, stat},
-  url = {http://arxiv.org/abs/1704.00794},
-  archiveprefix = {arXiv},
-  keywords = {Computer Science - Machine Learning,Statistics - Machine Learning}
+title = {Time {{Series Cluster Kernel}} for {{Learning Similarities}} between {{Multivariate Time Series}} with {{Missing Data}}},
+author = {Mikalsen, Karl {\O}yvind and Bianchi, Filippo Maria and {Soguero-Ruiz}, Cristina and Jenssen, Robert},
+year = {2017},
+month = jun,
+journal = {arXiv:1704.00794 [cs, stat]},
+eprint = {1704.00794},
+eprinttype = {arxiv},
+primaryclass = {cs, stat},
+url = {http://arxiv.org/abs/1704.00794},
+archiveprefix = {arXiv},
+keywords = {Computer Science - Machine Learning,Statistics - Machine Learning}
 }
 
 @inproceedings{oh2021STINGSelfattention,
-  title = {{{STING}}: {{Self-attention}} Based {{Time-series Imputation Networks}} Using {{GAN}}},
-  booktitle = {2021 {{IEEE International Conference}} on {{Data Mining}} ({{ICDM}})},
-  author = {Oh, Eunkyu and Kim, Taehun and Ji, Yunhu and Khyalia, Sushil},
-  year = {2021},
-  month = dec,
-  pages = {1264--1269},
-  issn = {2374-8486},
-  doi = {10.1109/ICDM51629.2021.00155},
-  keywords = {bidirectional RNN,Conferences,Correlation,Data collection,Deep learning,generative adversarial networks,Generative adversarial networks,Recurrent neural networks,self-attention,Time series analysis,time-series imputation}
+title = {{{STING}}: {{Self-attention}} Based {{Time-series Imputation Networks}} Using {{GAN}}},
+booktitle = {2021 {{IEEE International Conference}} on {{Data Mining}} ({{ICDM}})},
+author = {Oh, Eunkyu and Kim, Taehun and Ji, Yunhu and Khyalia, Sushil},
+year = {2021},
+month = dec,
+pages = {1264--1269},
+issn = {2374-8486},
+doi = {10.1109/ICDM51629.2021.00155},
+keywords = {bidirectional RNN,Conferences,Correlation,Data collection,Deep learning,generative adversarial networks,Generative adversarial networks,Recurrent neural networks,self-attention,Time series analysis,time-series imputation}
 }
 
 @article{oyvindmikalsen2021TimeSeries,
-  title = {Time Series Cluster Kernels to Exploit Informative Missingness and Incomplete Label Information},
-  author = {{\O}yvind Mikalsen, Karl and {Soguero-Ruiz}, Cristina and Maria Bianchi, Filippo and Revhaug, Arthur and Jenssen, Robert},
-  year = {2021},
-  month = jul,
-  journal = {Pattern Recognition},
-  volume = {115},
-  pages = {107896},
-  issn = {0031-3203},
-  doi = {10.1016/j.patcog.2021.107896},
-  url = {https://www.sciencedirect.com/science/article/pii/S0031320321000832},
-  keywords = {Informative missingness,Kernel methods,Missing data,Multivariate time series,Semi-supervised learning}
+title = {Time Series Cluster Kernels to Exploit Informative Missingness and Incomplete Label Information},
+author = {{\O}yvind Mikalsen, Karl and {Soguero-Ruiz}, Cristina and Maria Bianchi, Filippo and Revhaug, Arthur and Jenssen, Robert},
+year = {2021},
+month = jul,
+journal = {Pattern Recognition},
+volume = {115},
+pages = {107896},
+issn = {0031-3203},
+doi = {10.1016/j.patcog.2021.107896},
+url = {https://www.sciencedirect.com/science/article/pii/S0031320321000832},
+keywords = {Informative missingness,Kernel methods,Missing data,Multivariate time series,Semi-supervised learning}
 }
 
 @article{rand1971RandIndex,
-  title = {Objective {{Criteria}} for the {{Evaluation}} of {{Clustering Methods}}},
-  author = {Rand, William M.},
-  year = {1971},
-  journal = {Journal of the American Statistical Association},
-  volume = {66},
-  number = {336},
-  pages = {846--850},
-  publisher = {{[American Statistical Association, Taylor \& Francis, Ltd.]}},
-  issn = {0162-1459},
-  doi = {10.2307/2284239},
-  url = {https://www.jstor.org/stable/2284239}
+title = {Objective {{Criteria}} for the {{Evaluation}} of {{Clustering Methods}}},
+author = {Rand, William M.},
+year = {1971},
+journal = {Journal of the American Statistical Association},
+volume = {66},
+number = {336},
+pages = {846--850},
+publisher = {{[American Statistical Association, Taylor \& Francis, Ltd.]}},
+issn = {0162-1459},
+doi = {10.2307/2284239},
+url = {https://www.jstor.org/stable/2284239}
 }
 
 @article{shukla2021MultiTimeAttention,
-  title = {Multi-{{Time Attention Networks}} for {{Irregularly Sampled Time Series}}},
-  author = {Shukla, Satya Narayan and Marlin, Benjamin M.},
-  year = {2021},
-  month = jun,
-  journal = {arXiv:2101.10318 [cs]},
-  eprint = {2101.10318},
-  eprinttype = {arxiv},
-  primaryclass = {cs},
-  url = {http://arxiv.org/abs/2101.10318},
-  archiveprefix = {arXiv},
-  keywords = {Computer Science - Artificial Intelligence,Computer Science - Machine Learning}
+title = {Multi-{{Time Attention Networks}} for {{Irregularly Sampled Time Series}}},
+author = {Shukla, Satya Narayan and Marlin, Benjamin M.},
+year = {2021},
+month = jun,
+journal = {arXiv:2101.10318 [cs]},
+eprint = {2101.10318},
+eprinttype = {arxiv},
+primaryclass = {cs},
+url = {http://arxiv.org/abs/2101.10318},
+archiveprefix = {arXiv},
+keywords = {Computer Science - Artificial Intelligence,Computer Science - Machine Learning}
 }
 
 @inproceedings{suo2020GLIMAGlobal,
-  title = {{{GLIMA}}: {{Global}} and {{Local Time Series Imputation}} with {{Multi-directional Attention Learning}}},
-  booktitle = {2020 {{IEEE International Conference}} on {{Big Data}} ({{Big Data}})},
-  author = {Suo, Qiuling and Zhong, Weida and Xun, Guangxu and Sun, Jianhui and Chen, Changyou and Zhang, Aidong},
-  year = {2020},
-  month = dec,
-  pages = {798--807},
-  doi = {10.1109/BigData50022.2020.9378408},
-  keywords = {Big Data,Conferences,Correlation,Missing Data,Recurrent Imputation,Recurrent neural networks,Self-Attention,Task analysis,Tensors,Time Series,Time series analysis}
+title = {{{GLIMA}}: {{Global}} and {{Local Time Series Imputation}} with {{Multi-directional Attention Learning}}},
+booktitle = {2020 {{IEEE International Conference}} on {{Big Data}} ({{Big Data}})},
+author = {Suo, Qiuling and Zhong, Weida and Xun, Guangxu and Sun, Jianhui and Chen, Changyou and Zhang, Aidong},
+year = {2020},
+month = dec,
+pages = {798--807},
+doi = {10.1109/BigData50022.2020.9378408},
+keywords = {Big Data,Conferences,Correlation,Missing Data,Recurrent Imputation,Recurrent neural networks,Self-Attention,Task analysis,Tensors,Time Series,Time series analysis}
 }
 
 @article{tang2019JointModeling,
-  title = {Joint {{Modeling}} of {{Local}} and {{Global Temporal Dynamics}} for {{Multivariate Time Series Forecasting}} with {{Missing Values}}},
-  author = {Tang, Xianfeng and Yao, Huaxiu and Sun, Yiwei and Aggarwal, Charu and Mitra, Prasenjit and Wang, Suhang},
-  year = {2019},
-  month = nov,
-  journal = {arXiv:1911.10273 [cs, stat]},
-  eprint = {1911.10273},
-  eprinttype = {arxiv},
-  primaryclass = {cs, stat},
-  url = {http://arxiv.org/abs/1911.10273},
-  archiveprefix = {arXiv},
-  keywords = {Computer Science - Machine Learning,Statistics - Machine Learning}
+title = {Joint {{Modeling}} of {{Local}} and {{Global Temporal Dynamics}} for {{Multivariate Time Series Forecasting}} with {{Missing Values}}},
+author = {Tang, Xianfeng and Yao, Huaxiu and Sun, Yiwei and Aggarwal, Charu and Mitra, Prasenjit and Wang, Suhang},
+year = {2019},
+month = nov,
+journal = {arXiv:1911.10273 [cs, stat]},
+eprint = {1911.10273},
+eprinttype = {arxiv},
+primaryclass = {cs, stat},
+url = {http://arxiv.org/abs/1911.10273},
+archiveprefix = {arXiv},
+keywords = {Computer Science - Machine Learning,Statistics - Machine Learning}
 }
 
 @article{tashiro2021CSDI,
-  title = {{{CSDI}}: {{Conditional Score-based Diffusion Models}} for {{Probabilistic Time Series Imputation}}},
-  author = {Tashiro, Yusuke and Song, Jiaming and Song, Yang and Ermon, Stefano},
-  year = {2021},
-  month = oct,
-  journal = {arXiv:2107.03502 [cs, stat]},
-  eprint = {2107.03502},
-  eprinttype = {arxiv},
-  primaryclass = {cs, stat},
-  url = {http://arxiv.org/abs/2107.03502},
-  archiveprefix = {arXiv},
-  keywords = {Computer Science - Machine Learning,Statistics - Machine Learning}
+title = {{{CSDI}}: {{Conditional Score-based Diffusion Models}} for {{Probabilistic Time Series Imputation}}},
+author = {Tashiro, Yusuke and Song, Jiaming and Song, Yang and Ermon, Stefano},
+year = {2021},
+month = oct,
+journal = {arXiv:2107.03502 [cs, stat]},
+eprint = {2107.03502},
+eprinttype = {arxiv},
+primaryclass = {cs, stat},
+url = {http://arxiv.org/abs/2107.03502},
+archiveprefix = {arXiv},
+keywords = {Computer Science - Machine Learning,Statistics - Machine Learning}
 }
 
 @inproceedings{vaswani2017Transformer,
- author = {Vaswani, Ashish and Shazeer, Noam and Parmar, Niki and Uszkoreit, Jakob and Jones, Llion and Gomez, Aidan N and Kaiser, \L ukasz and Polosukhin, Illia},
- booktitle = {Advances in Neural Information Processing Systems},
- editor = {I. Guyon and U. Von Luxburg and S. Bengio and H. Wallach and R. Fergus and S. Vishwanathan and R. Garnett},
- pages = {},
- publisher = {Curran Associates, Inc.},
- title = {Attention is All you Need},
- url = {https://proceedings.neurips.cc/paper/2017/file/3f5ee243547dee91fbd053c1c4a845aa-Paper.pdf},
- volume = {30},
- year = {2017}
+author = {Vaswani, Ashish and Shazeer, Noam and Parmar, Niki and Uszkoreit, Jakob and Jones, Llion and Gomez, Aidan N and Kaiser, \L ukasz and Polosukhin, Illia},
+booktitle = {Advances in Neural Information Processing Systems},
+editor = {I. Guyon and U. Von Luxburg and S. Bengio and H. Wallach and R. Fergus and S. Vishwanathan and R. Garnett},
+pages = {},
+publisher = {Curran Associates, Inc.},
+title = {Attention is All you Need},
+url = {https://proceedings.neurips.cc/paper/2017/file/3f5ee243547dee91fbd053c1c4a845aa-Paper.pdf},
+volume = {30},
+year = {2017}
 }
 
 @article{wu2015TimeSeries,
-  title = {Time {{Series Forecasting}} with {{Missing Values}}},
-  author = {Wu, Shin-Fu and Chang, Chia-Yung and Lee, Shie-Jue},
-  year = {2015},
-  month = apr,
-  journal = {EAI Endorsed Transactions on Cognitive Communications},
-  volume = {"1"},
-  number = {4},
-  issn = {2313-4534},
-  url = {https://eudl.eu/doi/10.4108/icst.iniscom.2015.258269}
+title = {Time {{Series Forecasting}} with {{Missing Values}}},
+author = {Wu, Shin-Fu and Chang, Chia-Yung and Lee, Shie-Jue},
+year = {2015},
+month = apr,
+journal = {EAI Endorsed Transactions on Cognitive Communications},
+volume = {"1"},
+number = {4},
+issn = {2313-4534},
+url = {https://eudl.eu/doi/10.4108/icst.iniscom.2015.258269}
 }
 
 @article{yoon2017EstimatingMissing,
-  title = {Estimating {{Missing Data}} in {{Temporal Data Streams Using Multi-directional Recurrent Neural Networks}}},
-  author = {Yoon, Jinsung and Zame, William R. and {van der Schaar}, Mihaela},
-  year = {2017},
-  month = nov,
-  journal = {arXiv:1711.08742 [cs]},
-  eprint = {1711.08742},
-  eprinttype = {arxiv},
-  primaryclass = {cs},
-  url = {http://arxiv.org/abs/1711.08742},
-  archiveprefix = {arXiv},
-  keywords = {Computer Science - Machine Learning}
+title = {Estimating {{Missing Data}} in {{Temporal Data Streams Using Multi-directional Recurrent Neural Networks}}},
+author = {Yoon, Jinsung and Zame, William R. and {van der Schaar}, Mihaela},
+year = {2017},
+month = nov,
+journal = {arXiv:1711.08742 [cs]},
+eprint = {1711.08742},
+eprinttype = {arxiv},
+primaryclass = {cs},
+url = {http://arxiv.org/abs/1711.08742},
+archiveprefix = {arXiv},
+keywords = {Computer Science - Machine Learning}
 }
 
 @article{yuan2019E2GAN,
-  title = {{{E}}{$^{2}$}{{GAN}}: {{End-to-End Generative Adversarial Network}} for {{Multivariate Time Series Imputation}}},
-  author = {Yuan, Xiaojie and Luo, Yonghong and Zhang, Ying and Cai, Xiangrui},
-  year = {2019},
-  pages = {3094--3100},
-  url = {https://www.ijcai.org/Proceedings/2019/429}
+title = {{{E}}{$^{2}$}{{GAN}}: {{End-to-End Generative Adversarial Network}} for {{Multivariate Time Series Imputation}}},
+author = {Yuan, Xiaojie and Luo, Yonghong and Zhang, Ying and Cai, Xiangrui},
+year = {2019},
+pages = {3094--3100},
+url = {https://www.ijcai.org/Proceedings/2019/429}
 }
 
 @article{zhang2022Raindrop,
-  title = {Graph-{{Guided Network}} for {{Irregularly Sampled Multivariate Time Series}}},
-  author = {Zhang, Xiang and Zeman, Marko and Tsiligkaridis, Theodoros and Zitnik, Marinka},
-  year = {2022},
-  month = mar,
-  journal = {arXiv:2110.05357 [cs]},
-  eprint = {2110.05357},
-  eprinttype = {arxiv},
-  primaryclass = {cs},
-  url = {http://arxiv.org/abs/2110.05357},
-  archiveprefix = {arXiv},
-  keywords = {Computer Science - Artificial Intelligence,Computer Science - Machine Learning}
+title = {Graph-{{Guided Network}} for {{Irregularly Sampled Multivariate Time Series}}},
+author = {Zhang, Xiang and Zeman, Marko and Tsiligkaridis, Theodoros and Zitnik, Marinka},
+year = {2022},
+month = mar,
+journal = {arXiv:2110.05357 [cs]},
+eprint = {2110.05357},
+eprinttype = {arxiv},
+primaryclass = {cs},
+url = {http://arxiv.org/abs/2110.05357},
+archiveprefix = {arXiv},
+keywords = {Computer Science - Artificial Intelligence,Computer Science - Machine Learning}
 }
 
diff --git a/pypots/base.py b/pypots/base.py
index 106f981d..49b1b0c2 100644
--- a/pypots/base.py
+++ b/pypots/base.py
@@ -11,8 +11,8 @@
 import numpy as np
 import torch
 
-from pypots.utils.logging import logger
 from pypots.utils.files import create_dir_if_not_exist
+from pypots.utils.logging import logger
 
 
 class BaseModel(ABC):
diff --git a/pypots/classification/base.py b/pypots/classification/base.py
index 54d40889..598902aa 100644
--- a/pypots/classification/base.py
+++ b/pypots/classification/base.py
@@ -77,7 +77,59 @@ def __init__(
         self.n_classes = n_classes
 
     @abstractmethod
-    def assemble_input_data(self, data):
+    def assemble_input_for_training(self, data) -> dict:
+        """Assemble the given data into a dictionary for training input.
+
+        Parameters
+        ----------
+        data : list,
+            Input data from dataloader, should be list.
+
+        Returns
+        -------
+        dict,
+            A python dictionary contains the input data for model training.
+        """
+        pass
+
+    @abstractmethod
+    def assemble_input_for_validating(self, data) -> dict:
+        """Assemble the given data into a dictionary for validating input.
+
+        Parameters
+        ----------
+        data : list,
+            Data output from dataloader, should be list.
+
+        Returns
+        -------
+        dict,
+            A python dictionary contains the input data for model validating.
+        """
+        pass
+
+    @abstractmethod
+    def assemble_input_for_testing(self, data) -> dict:
+        """Assemble the given data into a dictionary for testing input.
+
+        Notes
+        -----
+        The processing functions of train/val/test stages are separated for the situation that the input of
+        the three stages are different, and this situation usually happens when the Dataset/Dataloader classes
+        used in the train/val/test stages are not the same, e.g. the training data and validating data in a
+        classification task contains labels, but the testing data (from the production environment) generally
+        doesn't have labels.
+
+        Parameters
+        ----------
+        data : list,
+            Data output from dataloader, should be list.
+
+        Returns
+        -------
+        dict,
+            A python dictionary contains the input data for model testing.
+        """
         pass
 
     def _train_model(self, training_loader, val_loader=None):
@@ -94,7 +146,7 @@ def _train_model(self, training_loader, val_loader=None):
                 self.model.train()
                 epoch_train_loss_collector = []
                 for idx, data in enumerate(training_loader):
-                    inputs = self.assemble_input_data(data)
+                    inputs = self.assemble_input_for_training(data)
                     self.optimizer.zero_grad()
                     results = self.model.forward(inputs)
                     results["loss"].backward()
@@ -111,7 +163,7 @@ def _train_model(self, training_loader, val_loader=None):
                     epoch_val_loss_collector = []
                     with torch.no_grad():
                         for idx, data in enumerate(val_loader):
-                            inputs = self.assemble_input_data(data)
+                            inputs = self.assemble_input_for_validating(data)
                             results = self.model.forward(inputs)
                             epoch_val_loss_collector.append(results["loss"].item())
 
diff --git a/pypots/classification/brits.py b/pypots/classification/brits.py
index f73dbcf5..5ef03860 100644
--- a/pypots/classification/brits.py
+++ b/pypots/classification/brits.py
@@ -219,7 +219,7 @@ def fit(self, train_X, train_y, val_X=None, val_y=None):
         self.model.eval()  # set the model as eval status to freeze it.
         return self
 
-    def assemble_input_data(self, data):
+    def assemble_input_for_training(self, data):
         """Assemble the input data into a dictionary.
 
         Parameters
@@ -248,7 +248,11 @@ def assemble_input_data(self, data):
         inputs = {
             "indices": indices,
             "label": label,
-            "forward": {"X": X, "missing_mask": missing_mask, "deltas": deltas},
+            "forward": {
+                "X": X,
+                "missing_mask": missing_mask,
+                "deltas": deltas,
+            },
             "backward": {
                 "X": back_X,
                 "missing_mask": back_missing_mask,
@@ -257,6 +261,70 @@ def assemble_input_data(self, data):
         }
         return inputs
 
+    def assemble_input_for_validating(self, data) -> dict:
+        """Assemble the given data into a dictionary for validating input.
+
+        Notes
+        -----
+        The validating data assembling processing is the same as training data assembling.
+
+
+        Parameters
+        ----------
+        data : list,
+            A list containing data fetched from Dataset by Dataloader.
+
+        Returns
+        -------
+        inputs : dict,
+            A python dictionary contains the input data for model validating.
+        """
+        return self.assemble_input_for_training(data)
+
+    def assemble_input_for_testing(self, data) -> dict:
+        """Assemble the given data into a dictionary for testing input.
+
+        Notes
+        -----
+        The testing data assembling processing is the same as training data assembling.
+
+        Parameters
+        ----------
+        data : list,
+            A list containing data fetched from Dataset by Dataloader.
+
+        Returns
+        -------
+        inputs : dict,
+            A python dictionary contains the input data for model testing.
+        """
+        # fetch data
+        (
+            indices,
+            X,
+            missing_mask,
+            deltas,
+            back_X,
+            back_missing_mask,
+            back_deltas,
+        ) = data
+
+        # assemble input data
+        inputs = {
+            "indices": indices,
+            "forward": {
+                "X": X,
+                "missing_mask": missing_mask,
+                "deltas": deltas,
+            },
+            "backward": {
+                "X": back_X,
+                "deltas": back_deltas,
+                "missing_mask": back_missing_mask,
+            },
+        }
+        return inputs
+
     def classify(self, X):
         X = self.check_input(self.n_steps, self.n_features, X)
         self.model.eval()  # set the model as eval status to freeze it.
@@ -266,27 +334,7 @@ def classify(self, X):
 
         with torch.no_grad():
             for idx, data in enumerate(test_loader):
-                # cannot use input_data_processing, cause here has no label
-                (
-                    indices,
-                    X,
-                    missing_mask,
-                    deltas,
-                    back_X,
-                    back_missing_mask,
-                    back_deltas,
-                ) = data
-                # assemble input data
-                inputs = {
-                    "indices": indices,
-                    "forward": {"X": X, "missing_mask": missing_mask, "deltas": deltas},
-                    "backward": {
-                        "X": back_X,
-                        "missing_mask": back_missing_mask,
-                        "deltas": back_deltas,
-                    },
-                }
-
+                inputs = self.assemble_input_for_testing(data)
                 results, _, _ = self.model.classify(inputs)
                 prediction_collector.append(results["prediction"])
 
diff --git a/pypots/classification/grud.py b/pypots/classification/grud.py
index 7b313eb0..69929dcc 100644
--- a/pypots/classification/grud.py
+++ b/pypots/classification/grud.py
@@ -181,7 +181,7 @@ def fit(self, train_X, train_y, val_X=None, val_y=None):
         self.model.eval()  # set the model as eval status to freeze it.
         return self
 
-    def assemble_input_data(self, data):
+    def assemble_input_for_training(self, data):
         """Assemble the input data into a dictionary.
 
         Parameters
@@ -209,6 +209,56 @@ def assemble_input_data(self, data):
         }
         return inputs
 
+    def assemble_input_for_validating(self, data) -> dict:
+        """Assemble the given data into a dictionary for validating input.
+
+        Notes
+        -----
+        The validating data assembling processing is the same as training data assembling.
+
+
+        Parameters
+        ----------
+        data : list,
+            A list containing data fetched from Dataset by Dataloader.
+
+        Returns
+        -------
+        inputs : dict,
+            A python dictionary contains the input data for model validating.
+        """
+        return self.assemble_input_for_training(data)
+
+    def assemble_input_for_testing(self, data) -> dict:
+        """Assemble the given data into a dictionary for testing input.
+
+        Notes
+        -----
+        The testing data assembling processing is the same as training data assembling.
+
+        Parameters
+        ----------
+        data : list,
+            A list containing data fetched from Dataset by Dataloader.
+
+        Returns
+        -------
+        inputs : dict,
+            A python dictionary contains the input data for model testing.
+        """
+        indices, X, X_filledLOCF, missing_mask, deltas, empirical_mean = data
+
+        inputs = {
+            "indices": indices,
+            "X": X,
+            "X_filledLOCF": X_filledLOCF,
+            "missing_mask": missing_mask,
+            "deltas": deltas,
+            "empirical_mean": empirical_mean,
+        }
+
+        return inputs
+
     def classify(self, X):
         X = self.check_input(self.n_steps, self.n_features, X)
         self.model.eval()  # set the model as eval status to freeze it.
@@ -218,18 +268,7 @@ def classify(self, X):
 
         with torch.no_grad():
             for idx, data in enumerate(test_loader):
-                # cannot use input_data_processing, cause here has no label
-                indices, X, X_filledLOCF, missing_mask, deltas, empirical_mean = data
-                # assemble input data
-                inputs = {
-                    "indices": indices,
-                    "X": X,
-                    "X_filledLOCF": X_filledLOCF,
-                    "missing_mask": missing_mask,
-                    "deltas": deltas,
-                    "empirical_mean": empirical_mean,
-                }
-
+                inputs = self.assemble_input_for_testing(data)
                 prediction = self.model.classify(inputs)
                 prediction_collector.append(prediction)
 
diff --git a/pypots/classification/raindrop.py b/pypots/classification/raindrop.py
index d63f0560..c6204bc5 100644
--- a/pypots/classification/raindrop.py
+++ b/pypots/classification/raindrop.py
@@ -702,7 +702,7 @@ def fit(self, train_X, train_y, val_X=None, val_y=None):
         self.model.eval()  # set the model as eval status to freeze it.
         return self
 
-    def assemble_input_data(self, data):
+    def assemble_input_for_training(self, data):
         """Assemble the input data into a dictionary.
 
         Parameters
@@ -736,6 +736,58 @@ def assemble_input_data(self, data):
         }
         return inputs
 
+    def assemble_input_for_validating(self, data) -> dict:
+        """Assemble the given data into a dictionary for validating input.
+
+        Notes
+        -----
+        The validating data assembling processing is the same as training data assembling.
+
+
+        Parameters
+        ----------
+        data : list,
+            A list containing data fetched from Dataset by Dataloader.
+
+        Returns
+        -------
+        inputs : dict,
+            A python dictionary contains the input data for model validating.
+        """
+        return self.assemble_input_for_training(data)
+
+    def assemble_input_for_testing(self, data) -> dict:
+        """Assemble the given data into a dictionary for testing input.
+
+        Parameters
+        ----------
+        data : list,
+            A list containing data fetched from Dataset by Dataloader.
+
+        Returns
+        -------
+        inputs : dict,
+            A python dictionary contains the input data for model testing.
+        """
+        indices, X, X_filledLOCF, missing_mask, deltas, empirical_mean = data
+        bz, n_steps, n_features = X.shape
+        lengths = torch.tensor([n_steps] * bz, dtype=torch.float)
+        times = torch.tensor(range(n_steps), dtype=torch.float).repeat(bz, 1)
+
+        X = X.permute(1, 0, 2)
+        missing_mask = missing_mask.permute(1, 0, 2)
+        times = times.permute(1, 0)
+
+        inputs = {
+            "X": X,
+            "static": None,
+            "timestamps": times,
+            "lengths": lengths,
+            "missing_mask": missing_mask,
+        }
+
+        return inputs
+
     def classify(self, X):
         X = self.check_input(self.n_steps, self.n_features, X)
         self.model.eval()  # set the model as eval status to freeze it.
@@ -745,26 +797,7 @@ def classify(self, X):
 
         with torch.no_grad():
             for idx, data in enumerate(test_loader):
-                # cannot use input_data_processing, cause here has no label
-                indices, X, X_filledLOCF, missing_mask, deltas, empirical_mean = data
-                # assemble input data
-
-                bz, n_steps, n_features = X.shape
-                lengths = torch.tensor([n_steps] * bz, dtype=torch.float)
-                times = torch.tensor(range(n_steps), dtype=torch.float).repeat(bz, 1)
-
-                X = X.permute(1, 0, 2)
-                missing_mask = missing_mask.permute(1, 0, 2)
-                times = times.permute(1, 0)
-
-                inputs = {
-                    "X": X,
-                    "static": None,
-                    "timestamps": times,
-                    "lengths": lengths,
-                    "missing_mask": missing_mask,
-                }
-
+                inputs = self.assemble_input_for_testing(data)
                 prediction = self.model.classify(inputs)
                 prediction_collector.append(prediction)
 
diff --git a/pypots/clustering/base.py b/pypots/clustering/base.py
index 30f69f49..f3cc8c2e 100644
--- a/pypots/clustering/base.py
+++ b/pypots/clustering/base.py
@@ -71,7 +71,59 @@ def __init__(
         self.n_clusters = n_clusters
 
     @abstractmethod
-    def assemble_input_data(self, data):
+    def assemble_input_for_training(self, data) -> dict:
+        """Assemble the given data into a dictionary for training input.
+
+        Parameters
+        ----------
+        data : list,
+            Input data from dataloader, should be list.
+
+        Returns
+        -------
+        dict,
+            A python dictionary contains the input data for model training.
+        """
+        pass
+
+    @abstractmethod
+    def assemble_input_for_validating(self, data) -> dict:
+        """Assemble the given data into a dictionary for validating input.
+
+        Parameters
+        ----------
+        data : list,
+            Data output from dataloader, should be list.
+
+        Returns
+        -------
+        dict,
+            A python dictionary contains the input data for model validating.
+        """
+        pass
+
+    @abstractmethod
+    def assemble_input_for_testing(self, data) -> dict:
+        """Assemble the given data into a dictionary for testing input.
+
+        Notes
+        -----
+        The processing functions of train/val/test stages are separated for the situation that the input of
+        the three stages are different, and this situation usually happens when the Dataset/Dataloader classes
+        used in the train/val/test stages are not the same, e.g. the training data and validating data in a
+        classification task contains labels, but the testing data (from the production environment) generally
+        doesn't have labels.
+
+        Parameters
+        ----------
+        data : list,
+            Data output from dataloader, should be list.
+
+        Returns
+        -------
+        dict,
+            A python dictionary contains the input data for model testing.
+        """
         pass
 
     def _train_model(self, training_loader, val_loader=None):
@@ -88,7 +140,7 @@ def _train_model(self, training_loader, val_loader=None):
                 self.model.train()
                 epoch_train_loss_collector = []
                 for idx, data in enumerate(training_loader):
-                    inputs = self.assemble_input_data(data)
+                    inputs = self.assemble_input_for_training(data)
                     self.optimizer.zero_grad()
                     results = self.model.forward(inputs)
                     results["loss"].backward()
@@ -105,7 +157,7 @@ def _train_model(self, training_loader, val_loader=None):
                     epoch_val_loss_collector = []
                     with torch.no_grad():
                         for idx, data in enumerate(val_loader):
-                            inputs = self.assemble_input_data(data)
+                            inputs = self.assemble_input_for_validating(data)
                             results = self.model.forward(inputs)
                             epoch_val_loss_collector.append(results["loss"].item())
 
diff --git a/pypots/clustering/crli.py b/pypots/clustering/crli.py
index f6305a41..b0bd9723 100644
--- a/pypots/clustering/crli.py
+++ b/pypots/clustering/crli.py
@@ -363,19 +363,20 @@ def fit(self, train_X):
         self.model.eval()  # set the model as eval status to freeze it.
         return self
 
-    def assemble_input_data(self, data):
-        """Assemble the input data into a dictionary.
+    def assemble_input_for_training(self, data):
+        """Assemble the given data into a dictionary for training input.
 
         Parameters
         ----------
-        data : list
-            A list containing data fetched from Dataset by Dataload.
+        data : list,
+            A list containing data fetched from Dataset by Dataloader.
 
         Returns
         -------
-        inputs : dict
-            A dictionary with data assembled.
+        inputs : dict,
+            A python dictionary contains the input data for model training.
         """
+
         # fetch data
         indices, X, _, missing_mask, _, _ = data
 
@@ -383,8 +384,48 @@ def assemble_input_data(self, data):
             "X": X,
             "missing_mask": missing_mask,
         }
+
         return inputs
 
+    def assemble_input_for_validating(self, data) -> dict:
+        """Assemble the given data into a dictionary for validating input.
+
+        Notes
+        -----
+        The validating data assembling processing is the same as training data assembling.
+
+
+        Parameters
+        ----------
+        data : list,
+            A list containing data fetched from Dataset by Dataloader.
+
+        Returns
+        -------
+        inputs : dict,
+            A python dictionary contains the input data for model validating.
+        """
+        return self.assemble_input_for_training(data)
+
+    def assemble_input_for_testing(self, data) -> dict:
+        """Assemble the given data into a dictionary for testing input.
+
+        Notes
+        -----
+        The testing data assembling processing is the same as training data assembling.
+
+        Parameters
+        ----------
+        data : list,
+            A list containing data fetched from Dataset by Dataloader.
+
+        Returns
+        -------
+        inputs : dict,
+            A python dictionary contains the input data for model testing.
+        """
+        return self.assemble_input_for_training(data)
+
     def _train_model(self, training_loader, val_loader=None):
         self.G_optimizer = torch.optim.Adam(
             [
@@ -410,7 +451,7 @@ def _train_model(self, training_loader, val_loader=None):
                 epoch_train_loss_G_collector = []
                 epoch_train_loss_D_collector = []
                 for idx, data in enumerate(training_loader):
-                    inputs = self.assemble_input_data(data)
+                    inputs = self.assemble_input_for_training(data)
 
                     for _ in range(self.D_steps):
                         self.D_optimizer.zero_grad()
@@ -483,7 +524,7 @@ def cluster(self, X):
 
         with torch.no_grad():
             for idx, data in enumerate(test_loader):
-                inputs = self.assemble_input_data(data)
+                inputs = self.assemble_input_for_testing(data)
                 inputs = self.model.cluster(inputs)
                 latent_collector.append(inputs["fcn_latent"])
 
diff --git a/pypots/clustering/vader.py b/pypots/clustering/vader.py
index 7922daac..14f682fe 100644
--- a/pypots/clustering/vader.py
+++ b/pypots/clustering/vader.py
@@ -389,19 +389,20 @@ def fit(self, train_X):
         self.model.eval()  # set the model as eval status to freeze it.
         return self
 
-    def assemble_input_data(self, data):
-        """Assemble the input data into a dictionary.
+    def assemble_input_for_training(self, data):
+        """Assemble the given data into a dictionary for training input.
 
         Parameters
         ----------
-        data : list
-            A list containing data fetched from Dataset by Dataload.
+        data : list,
+            A list containing data fetched from Dataset by Dataloader.
 
         Returns
         -------
-        inputs : dict
-            A dictionary with data assembled.
+        inputs : dict,
+            A python dictionary contains the input data for model training.
         """
+
         # fetch data
         indices, X, _, missing_mask, _, _ = data
 
@@ -409,8 +410,48 @@ def assemble_input_data(self, data):
             "X": X,
             "missing_mask": missing_mask,
         }
+
         return inputs
 
+    def assemble_input_for_validating(self, data) -> dict:
+        """Assemble the given data into a dictionary for validating input.
+
+        Notes
+        -----
+        The validating data assembling processing is the same as training data assembling.
+
+
+        Parameters
+        ----------
+        data : list,
+            A list containing data fetched from Dataset by Dataloader.
+
+        Returns
+        -------
+        inputs : dict,
+            A python dictionary contains the input data for model validating.
+        """
+        return self.assemble_input_for_training(data)
+
+    def assemble_input_for_testing(self, data) -> dict:
+        """Assemble the given data into a dictionary for testing input.
+
+        Notes
+        -----
+        The testing data assembling processing is the same as training data assembling.
+
+        Parameters
+        ----------
+        data : list,
+            A list containing data fetched from Dataset by Dataloader.
+
+        Returns
+        -------
+        inputs : dict,
+            A python dictionary contains the input data for model testing.
+        """
+        return self.assemble_input_for_training(data)
+
     def _train_model(self, training_loader, val_loader=None):
         self.optimizer = torch.optim.Adam(
             self.model.parameters(), lr=self.lr, weight_decay=self.weight_decay
@@ -424,7 +465,7 @@ def _train_model(self, training_loader, val_loader=None):
         for epoch in range(self.pretrain_epochs):
             self.model.train()
             for idx, data in enumerate(training_loader):
-                inputs = self.assemble_input_data(data)
+                inputs = self.assemble_input_for_training(data)
                 self.optimizer.zero_grad()
                 results = self.model.forward(inputs, pretrain=True)
                 results["loss"].backward()
@@ -433,7 +474,7 @@ def _train_model(self, training_loader, val_loader=None):
             sample_collector = []
             for _ in range(10):  # sampling 10 times
                 for idx, data in enumerate(training_loader):
-                    inputs = self.assemble_input_data(data)
+                    inputs = self.assemble_input_for_validating(data)
                     results = self.model.forward(inputs, pretrain=True)
                     sample_collector.append(results["z"])
             samples = torch.cat(sample_collector).cpu().detach().numpy()
@@ -456,7 +497,7 @@ def _train_model(self, training_loader, val_loader=None):
                 self.model.train()
                 epoch_train_loss_collector = []
                 for idx, data in enumerate(training_loader):
-                    inputs = self.assemble_input_data(data)
+                    inputs = self.assemble_input_for_training(data)
                     self.optimizer.zero_grad()
                     results = self.model.forward(inputs)
                     results["loss"].backward()
@@ -473,7 +514,7 @@ def _train_model(self, training_loader, val_loader=None):
                     epoch_val_loss_collector = []
                     with torch.no_grad():
                         for idx, data in enumerate(val_loader):
-                            inputs = self.assemble_input_data(data)
+                            inputs = self.assemble_input_for_validating(data)
                             results = self.model.forward(inputs)
                             epoch_val_loss_collector.append(results["loss"].item())
 
@@ -525,7 +566,7 @@ def cluster(self, X):
 
         with torch.no_grad():
             for idx, data in enumerate(test_loader):
-                inputs = self.assemble_input_data(data)
+                inputs = self.assemble_input_for_testing(data)
                 results = self.model.cluster(inputs)
                 clustering_results_collector.append(results)
 
diff --git a/pypots/data/dataset_for_brits.py b/pypots/data/dataset_for_brits.py
index 0f3ee6a7..087bdba8 100644
--- a/pypots/data/dataset_for_brits.py
+++ b/pypots/data/dataset_for_brits.py
@@ -22,7 +22,7 @@ def parse_delta(missing_mask):
     -------
     delta, array,
         Delta matrix indicates time gaps of missing values.
-        Its math definition please refer to :cite:`che2018MissingData`.
+        Its math definition please refer to :cite:`che2018GRUD`.
     """
     # missing_mask is from X, and X's shape and type had been checked. So no need to double-check here.
     n_samples, n_steps, n_features = missing_mask.shape
diff --git a/pypots/data/dataset_for_mit.py b/pypots/data/dataset_for_mit.py
index b24e3f75..0edd8a88 100644
--- a/pypots/data/dataset_for_mit.py
+++ b/pypots/data/dataset_for_mit.py
@@ -14,7 +14,7 @@
 class DatasetForMIT(BaseDataset):
     """Dataset for models that need MIT (masked imputation task) in their training, such as SAITS.
 
-    For more information about MIT, please refer to :cite:`du2022SAITS`.
+    For more information about MIT, please refer to :cite:`du2023SAITS`.
 
     Parameters
     ----------
diff --git a/pypots/forecasting/base.py b/pypots/forecasting/base.py
index 64beadde..282b0336 100644
--- a/pypots/forecasting/base.py
+++ b/pypots/forecasting/base.py
@@ -14,6 +14,7 @@
 from pypots.base import BaseModel, BaseNNModel
 from pypots.utils.logging import logger
 
+
 class BaseForecaster(BaseModel):
     """Abstract class for all forecasting models."""
 
@@ -62,7 +63,59 @@ def __init__(
         )
 
     @abstractmethod
-    def assemble_input_data(self, data):
+    def assemble_input_for_training(self, data) -> dict:
+        """Assemble the given data into a dictionary for training input.
+
+        Parameters
+        ----------
+        data : list,
+            Input data from dataloader, should be list.
+
+        Returns
+        -------
+        dict,
+            A python dictionary contains the input data for model training.
+        """
+        pass
+
+    @abstractmethod
+    def assemble_input_for_validating(self, data) -> dict:
+        """Assemble the given data into a dictionary for validating input.
+
+        Parameters
+        ----------
+        data : list,
+            Data output from dataloader, should be list.
+
+        Returns
+        -------
+        dict,
+            A python dictionary contains the input data for model validating.
+        """
+        pass
+
+    @abstractmethod
+    def assemble_input_for_testing(self, data) -> dict:
+        """Assemble the given data into a dictionary for testing input.
+
+        Notes
+        -----
+        The processing functions of train/val/test stages are separated for the situation that the input of
+        the three stages are different, and this situation usually happens when the Dataset/Dataloader classes
+        used in the train/val/test stages are not the same, e.g. the training data and validating data in a
+        classification task contains labels, but the testing data (from the production environment) generally
+        doesn't have labels.
+
+        Parameters
+        ----------
+        data : list,
+            Data output from dataloader, should be list.
+
+        Returns
+        -------
+        dict,
+            A python dictionary contains the input data for model testing.
+        """
         pass
 
     def _train_model(self, training_loader, val_loader=None):
@@ -79,7 +132,7 @@ def _train_model(self, training_loader, val_loader=None):
                 self.model.train()
                 epoch_train_loss_collector = []
                 for idx, data in enumerate(training_loader):
-                    inputs = self.assemble_input_data(data)
+                    inputs = self.assemble_input_for_training(data)
                     self.optimizer.zero_grad()
                     results = self.model.forward(inputs)
                     results["loss"].backward()
@@ -96,7 +149,7 @@ def _train_model(self, training_loader, val_loader=None):
                     epoch_val_loss_collector = []
                     with torch.no_grad():
                         for idx, data in enumerate(val_loader):
-                            inputs = self.assemble_input_data(data)
+                            inputs = self.assemble_input_for_validating(data)
                             results = self.model.forward(inputs)
                             epoch_val_loss_collector.append(results["loss"].item())
 
diff --git a/pypots/imputation/base.py b/pypots/imputation/base.py
index a7290e14..e62ae50c 100644
--- a/pypots/imputation/base.py
+++ b/pypots/imputation/base.py
@@ -12,8 +12,8 @@
 import torch
 
 from pypots.base import BaseModel, BaseNNModel
-from pypots.utils.metrics import cal_mae
 from pypots.utils.logging import logger
+from pypots.utils.metrics import cal_mae
 
 try:
     import nni
@@ -71,7 +71,59 @@ def __init__(
         )
 
     @abstractmethod
-    def assemble_input_data(self, data):
+    def assemble_input_for_training(self, data) -> dict:
+        """Assemble the given data into a dictionary for training input.
+
+        Parameters
+        ----------
+        data : list,
+            Input data from dataloader, should be list.
+
+        Returns
+        -------
+        dict,
+            A python dictionary contains the input data for model training.
+        """
+        pass
+
+    @abstractmethod
+    def assemble_input_for_validating(self, data) -> dict:
+        """Assemble the given data into a dictionary for validating input.
+
+        Parameters
+        ----------
+        data : list,
+            Data output from dataloader, should be list.
+
+        Returns
+        -------
+        dict,
+            A python dictionary contains the input data for model validating.
+        """
+        pass
+
+    @abstractmethod
+    def assemble_input_for_testing(self, data) -> dict:
+        """Assemble the given data into a dictionary for testing input.
+
+        Notes
+        -----
+        The processing functions of train/val/test stages are separated for the situation that the input of
+        the three stages are different, and this situation usually happens when the Dataset/Dataloader classes
+        used in the train/val/test stages are not the same, e.g. the training data and validating data in a
+        classification task contains labels, but the testing data (from the production environment) generally
+        doesn't have labels.
+
+        Parameters
+        ----------
+        data : list,
+            Data output from dataloader, should be list.
+
+        Returns
+        -------
+        dict,
+            A python dictionary contains the input data for model testing.
+        """
         pass
 
     def _train_model(
@@ -94,7 +146,7 @@ def _train_model(
                 self.model.train()
                 epoch_train_loss_collector = []
                 for idx, data in enumerate(training_loader):
-                    inputs = self.assemble_input_data(data)
+                    inputs = self.assemble_input_for_training(data)
                     self.optimizer.zero_grad()
                     results = self.model.forward(inputs)
                     results["loss"].backward()
@@ -111,7 +163,7 @@ def _train_model(
                     imputation_collector = []
                     with torch.no_grad():
                         for idx, data in enumerate(val_loader):
-                            inputs = self.assemble_input_data(data)
+                            inputs = self.assemble_input_for_validating(data)
                             results = self.model.forward(inputs)
                             imputation_collector.append(results["imputed_data"])
 
diff --git a/pypots/imputation/brits.py b/pypots/imputation/brits.py
index 46587d81..d15c8e33 100644
--- a/pypots/imputation/brits.py
+++ b/pypots/imputation/brits.py
@@ -537,25 +537,31 @@ def fit(self, train_X, val_X=None):
         self.model.eval()  # set the model as eval status to freeze it.
         return self
 
-    def assemble_input_data(self, data):
-        """Assemble the input data into a dictionary.
+    def assemble_input_for_training(self, data):
+        """Assemble the given data into a dictionary for training input.
 
         Parameters
         ----------
-        data : list
-            A list containing data fetched from Dataset by Dataload.
+        data : list,
+            A list containing data fetched from Dataset by Dataloader.
 
         Returns
         -------
-        inputs : dict
-            A dictionary with data assembled.
+        inputs : dict,
+            A python dictionary contains the input data for model training.
         """
+
         # fetch data
         indices, X, missing_mask, deltas, back_X, back_missing_mask, back_deltas = data
+
         # assemble input data
         inputs = {
             "indices": indices,
-            "forward": {"X": X, "missing_mask": missing_mask, "deltas": deltas},
+            "forward": {
+                "X": X,
+                "missing_mask": missing_mask,
+                "deltas": deltas,
+            },
             "backward": {
                 "X": back_X,
                 "missing_mask": back_missing_mask,
@@ -565,6 +571,45 @@ def assemble_input_data(self, data):
 
         return inputs
 
+    def assemble_input_for_validating(self, data) -> dict:
+        """Assemble the given data into a dictionary for validating input.
+
+        Notes
+        -----
+        The validating data assembling processing is the same as training data assembling.
+
+
+        Parameters
+        ----------
+        data : list,
+            A list containing data fetched from Dataset by Dataloader.
+
+        Returns
+        -------
+        inputs : dict,
+            A python dictionary contains the input data for model validating.
+        """
+        return self.assemble_input_for_training(data)
+
+    def assemble_input_for_testing(self, data) -> dict:
+        """Assemble the given data into a dictionary for testing input.
+
+        Notes
+        -----
+        The testing data assembling processing is the same as training data assembling.
+
+        Parameters
+        ----------
+        data : list,
+            A list containing data fetched from Dataset by Dataloader.
+
+        Returns
+        -------
+        inputs : dict,
+            A python dictionary contains the input data for model testing.
+        """
+        return self.assemble_input_for_training(data)
+
     def impute(self, X):
         X = self.check_input(self.n_steps, self.n_features, X)
         self.model.eval()  # set the model as eval status to freeze it.
@@ -574,7 +619,7 @@ def impute(self, X):
 
         with torch.no_grad():
             for idx, data in enumerate(test_loader):
-                inputs = self.assemble_input_data(data)
+                inputs = self.assemble_input_for_testing(data)
                 imputed_data = self.model.impute(inputs)
                 imputation_collector.append(imputed_data)
 
diff --git a/pypots/imputation/saits.py b/pypots/imputation/saits.py
index 3badbbbe..d32bd0ab 100644
--- a/pypots/imputation/saits.py
+++ b/pypots/imputation/saits.py
@@ -239,19 +239,20 @@ def fit(self, train_X, val_X=None):
         self.model.load_state_dict(self.best_model_dict)
         self.model.eval()  # set the model as eval status to freeze it.
 
-    def assemble_input_data(self, data):
-        """Assemble the input data into a dictionary.
+    def assemble_input_for_training(self, data):
+        """Assemble the given data into a dictionary for training input.
 
         Parameters
         ----------
-        data : list
-            A list containing data fetched from Dataset by Dataload.
+        data : list,
+            A list containing data fetched from Dataset by Dataloader.
 
         Returns
         -------
-        inputs : dict
-            A dictionary with data assembled.
+        inputs : dict,
+            A python dictionary contains the input data for model training.
         """
+
         indices, X_intact, X, missing_mask, indicating_mask = data
 
         inputs = {
@@ -263,6 +264,45 @@ def assemble_input_data(self, data):
 
         return inputs
 
+    def assemble_input_for_validating(self, data) -> dict:
+        """Assemble the given data into a dictionary for validating input.
+
+        Notes
+        -----
+        The validating data assembling processing is the same as training data assembling.
+
+
+        Parameters
+        ----------
+        data : list,
+            A list containing data fetched from Dataset by Dataloader.
+
+        Returns
+        -------
+        inputs : dict,
+            A python dictionary contains the input data for model validating.
+        """
+        return self.assemble_input_for_training(data)
+
+    def assemble_input_for_testing(self, data) -> dict:
+        """Assemble the given data into a dictionary for testing input.
+
+        Notes
+        -----
+        The testing data assembling processing is the same as training data assembling.
+
+        Parameters
+        ----------
+        data : list,
+            A list containing data fetched from Dataset by Dataloader.
+
+        Returns
+        -------
+        inputs : dict,
+            A python dictionary contains the input data for model testing.
+        """
+        return self.assemble_input_for_training(data)
+
     def impute(self, X):
         X = self.check_input(self.n_steps, self.n_features, X)
         self.model.eval()  # set the model as eval status to freeze it.
diff --git a/pypots/imputation/transformer.py b/pypots/imputation/transformer.py
index 8146a266..c84c30b1 100644
--- a/pypots/imputation/transformer.py
+++ b/pypots/imputation/transformer.py
@@ -330,18 +330,18 @@ def fit(self, train_X, val_X=None):
         self.model.eval()  # set the model as eval status to freeze it.
         return self
 
-    def assemble_input_data(self, data):
-        """Assemble the input data into a dictionary.
+    def assemble_input_for_training(self, data):
+        """Assemble the given data into a dictionary for training input.
 
         Parameters
         ----------
-        data : list
-            A list containing data fetched from Dataset by Dataload.
+        data : list,
+            A list containing data fetched from Dataset by Dataloader.
 
         Returns
         -------
-        inputs : dict
-            A dictionary with data assembled.
+        inputs : dict,
+            A python dictionary contains the input data for model training.
         """
 
         indices, X_intact, X, missing_mask, indicating_mask = data
@@ -355,6 +355,45 @@ def assemble_input_data(self, data):
 
         return inputs
 
+    def assemble_input_for_validating(self, data) -> dict:
+        """Assemble the given data into a dictionary for validating input.
+
+        Notes
+        -----
+        The validating data assembling processing is the same as training data assembling.
+
+
+        Parameters
+        ----------
+        data : list,
+            A list containing data fetched from Dataset by Dataloader.
+
+        Returns
+        -------
+        inputs : dict,
+            A python dictionary contains the input data for model validating.
+        """
+        return self.assemble_input_for_training(data)
+
+    def assemble_input_for_testing(self, data) -> dict:
+        """Assemble the given data into a dictionary for testing input.
+
+        Notes
+        -----
+        The testing data assembling processing is the same as training data assembling.
+
+        Parameters
+        ----------
+        data : list,
+            A list containing data fetched from Dataset by Dataloader.
+
+        Returns
+        -------
+        inputs : dict,
+            A python dictionary contains the input data for model testing.
+        """
+        return self.assemble_input_for_training(data)
+
     def impute(self, X):
         X = self.check_input(self.n_steps, self.n_features, X)
         self.model.eval()  # set the model as eval status to freeze it.
diff --git a/requirements.txt b/requirements.txt
index e22d68a0..59de6847 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,6 +1,5 @@
-matplotlib
 numpy >= 1.23.3
-scikit_learn >= 0.24.1
+scikit-learn >= 0.24.1
 torch == 1.11.0
 scipy
 tensorboard
diff --git a/setup.py b/setup.py
index f0ba6587..ba9febff 100644
--- a/setup.py
+++ b/setup.py
@@ -31,9 +31,8 @@
     packages=find_packages(exclude=["tests"]),
     include_package_data=True,
     install_requires=[
-        "matplotlib",
-        "numpy",
-        "scikit_learn",
+        "numpy>=1.23.3",
+        "scikit-learn>=0.24.1",
         "scipy",
         "torch>=1.10",  # torch_sparse v0.6.12 requires 1.9<=torch<1.10, v0.6.13 needs torch>=1.10
         # "torch_sparse==0.6.13",