From c5d45944b254ee231eae93ed98edf3656fba09fa Mon Sep 17 00:00:00 2001 From: Kirill Trapeznikov Date: Tue, 28 Jun 2022 20:49:23 +0000 Subject: [PATCH] Squashed commit of the following: commit 695539472ffe006a56c8c942dd09a2b819bc9f08 Merge: 86eec0b 4a3ff27 Author: Kirill Trapeznikov Date: Tue Jun 28 20:20:23 2022 +0000 Merge branch 'master' of github.com:stresearch/gaia commit 4a3ff27119d882311c9bf3c27d81602e11bb7415 Author: ktrapeznikov Date: Tue Jun 28 16:19:57 2022 -0400 Dataset (#34) * Squashed commit of the following: commit 86eec0bcd8acdfb1ad25ca9b8fa2eccf925694f9 Merge: 2e2f458 4974acc Author: Kirill Trapeznikov Date: Tue Jun 28 20:06:52 2022 +0000 Merge branch '19-fix-mean-bias' commit 2e2f45865aee0ed3c0c3c8253ba73aff7339c14b Merge: 59be221 f6bedff Author: Kirill Trapeznikov Date: Tue Jun 28 20:04:25 2022 +0000 Merge branch 'master' of github.com:stresearch/gaia commit 4974acc399c308d00d12915d6ccca394c48ee689 Author: Kirill Trapeznikov Date: Tue Jun 28 20:01:30 2022 +0000 updates commit 98b0f426dcd0dd3d2bc0c915745643a26ccc6fc5 Author: Kirill Trapeznikov Date: Wed Jun 8 00:30:51 2022 +0000 minor fixes commit 59be22151c69d4ba1e12c640c6c2fbab7eaa0749 Merge: c160d9f e225ecd Author: Kirill Trapeznikov Date: Wed Jun 8 00:18:30 2022 +0000 Merge branch 'master' of github.com:stresearch/gaia commit c160d9f0a4c4edc22e435860ed6a5669a4c4e065 Author: Kirill Trapeznikov Date: Wed Jun 8 00:18:22 2022 +0000 adding changes to dataset * delete Co-authored-by: Kirill Trapeznikov commit 86eec0bcd8acdfb1ad25ca9b8fa2eccf925694f9 Merge: 2e2f458 4974acc Author: Kirill Trapeznikov Date: Tue Jun 28 20:06:52 2022 +0000 Merge branch '19-fix-mean-bias' commit 2e2f45865aee0ed3c0c3c8253ba73aff7339c14b Merge: 59be221 f6bedff Author: Kirill Trapeznikov Date: Tue Jun 28 20:04:25 2022 +0000 Merge branch 'master' of github.com:stresearch/gaia commit 4974acc399c308d00d12915d6ccca394c48ee689 Author: Kirill Trapeznikov Date: Tue Jun 28 20:01:30 2022 +0000 updates commit 98b0f426dcd0dd3d2bc0c915745643a26ccc6fc5 Author: Kirill Trapeznikov Date: Wed Jun 8 00:30:51 2022 +0000 minor fixes commit 59be22151c69d4ba1e12c640c6c2fbab7eaa0749 Merge: c160d9f e225ecd Author: Kirill Trapeznikov Date: Wed Jun 8 00:18:30 2022 +0000 Merge branch 'master' of github.com:stresearch/gaia commit c160d9f0a4c4edc22e435860ed6a5669a4c4e065 Author: Kirill Trapeznikov Date: Wed Jun 8 00:18:22 2022 +0000 adding changes to dataset --- create_dataset.py | 16 ++++++++++++++++ gaia/data.py | 7 +++++++ 2 files changed, 23 insertions(+) create mode 100644 create_dataset.py diff --git a/create_dataset.py b/create_dataset.py new file mode 100644 index 0000000..96dbec7 --- /dev/null +++ b/create_dataset.py @@ -0,0 +1,16 @@ +from gaia.data import NCDataConstructor + + +cam4 = "cam4-famip-30m-timestep" +spcam = "spcamclbm-nx-16-20m-timestep" +workers = 64 +cache = "cache" + +if __name__=="__main__": + # NCDataConstructor.default_data(split="train", workers =workers, prefix=cam4, train_years=3, save_location=".", cache = cache) + # NCDataConstructor.default_data(split="test", workers =workers, prefix=cam4, train_years=3, save_location=".", cache = cache) + # NCDataConstructor.default_data(split="train", workers =workers, prefix=spcam, train_years=2, save_location=".", cache = cache) + NCDataConstructor.default_data(split="test", workers =workers, prefix=spcam, train_years=2, save_location=".", cache = cache) + + + # NCDataConstructor.default_data(split="train") \ No newline at end of file diff --git a/gaia/data.py b/gaia/data.py index c53c32c..0141208 100644 --- a/gaia/data.py +++ b/gaia/data.py @@ -753,6 +753,13 @@ def load_files_parallel(self, files, num_workers=8, save_file=None): # logger.warning(f"failed {args}, {kwargs}") # return + logger.info("delete cache files if any") + + os.makedirs(self.cache, exist_ok=True) + + for f in tqdm.tqdm(glob.glob(os.path.join(self.cache,"*"))): + os.remove(f) + logger.info("downloading files") with ProcessPoolExecutor(max_workers=num_workers) as exec: