From 6a5eb960303d8d6407e3c59cacd2a1d92ecc9460 Mon Sep 17 00:00:00 2001 From: allaffa Date: Mon, 19 Aug 2024 14:37:56 -0400 Subject: [PATCH 01/28] utils renamed and black formatting applied --- examples/alexandria/find_json_files.py | 2 +- examples/alexandria/train.py | 42 ++++++++------- examples/ani1_x/train.py | 44 +++++++++------- examples/csce/train_gap.py | 51 ++++++++++-------- .../train_discrete_uv_spectrum.py | 20 +++---- .../train_smooth_uv_spectrum.py | 20 +++---- examples/eam/eam.py | 30 ++++++----- examples/ising_model/train_ising.py | 41 ++++++++------- examples/lsms/lsms.py | 21 +++++--- examples/md17/md17.py | 8 +-- examples/mptrj/train.py | 40 ++++++++------ .../multidataset/energy_linear_regression.py | 10 ++-- examples/multidataset/train.py | 30 +++++------ examples/multidataset_hpo/gfm.py | 30 +++++------ examples/ogb/train_gap.py | 39 +++++++------- .../open_catalyst_2020/download_dataset.py | 4 +- examples/open_catalyst_2020/train.py | 27 +++++----- examples/open_catalyst_2020/uncompress.py | 2 +- .../utils/atoms_to_graphs.py | 5 +- examples/open_catalyst_2022/train.py | 41 ++++++++------- examples/qm7x/train.py | 31 ++++++----- examples/qm9/qm9.py | 6 +-- examples/qm9_hpo/qm9.py | 6 +-- examples/qm9_hpo/qm9_deephyper.py | 4 +- examples/qm9_hpo/qm9_optuna.py | 4 +- hydragnn/__init__.py | 1 - hydragnn/models/Base.py | 2 +- hydragnn/preprocess/__init__.py | 6 +-- hydragnn/preprocess/cfg_raw_dataset_loader.py | 1 - ...py => graph_samples_checks_and_updates.py} | 52 ++----------------- hydragnn/preprocess/load_data.py | 17 +++--- hydragnn/preprocess/raw_dataset_loader.py | 11 ++-- .../preprocess/serialized_dataset_loader.py | 16 +++--- hydragnn/preprocess/stratified_sampling.py | 48 +++++++++++++++++ hydragnn/run_prediction.py | 2 +- hydragnn/run_training.py | 9 ++-- hydragnn/utils/__init__.py | 40 -------------- hydragnn/utils/datasets/__init__.py | 19 +++++++ .../{ => datasets}/abstractbasedataset.py | 6 +-- .../{ => datasets}/abstractrawdataset.py | 25 +++------ hydragnn/utils/{ => datasets}/adiosdataset.py | 13 +++-- hydragnn/utils/{ => datasets}/cfgdataset.py | 2 +- .../datasets}/compositional_data_splitting.py | 8 +-- hydragnn/utils/{ => datasets}/distdataset.py | 10 ++-- hydragnn/utils/{ => datasets}/lsmsdataset.py | 2 +- .../utils/{ => datasets}/pickledataset.py | 9 ++-- .../utils/{ => datasets}/serializeddataset.py | 12 ++--- hydragnn/utils/{ => datasets}/xyzdataset.py | 3 +- .../descriptors_and_embeddings/__init__.py | 6 +++ .../atomicdescriptors.py | 0 .../smiles_utils.py | 0 hydragnn/utils/distributed/__init__.py | 16 ++++++ .../utils/{ => distributed}/distributed.py | 3 +- hydragnn/utils/hpo/__init__.py | 7 +++ hydragnn/utils/{ => hpo}/deephyper.py | 0 .../utils/input_config_parsing/__init__.py | 6 +++ .../config_utils.py | 13 +++-- hydragnn/utils/model/__init__.py | 11 ++++ hydragnn/utils/{ => model}/model.py | 5 +- hydragnn/utils/optimizer/__init__.py | 1 + hydragnn/utils/{ => optimizer}/optimizer.py | 2 +- hydragnn/utils/print/__init__.py | 1 + hydragnn/utils/{ => print}/print_utils.py | 4 +- .../utils/profiling_and_tracing/__init__.py | 3 ++ .../gptl4py_dummy.py | 1 - .../{ => profiling_and_tracing}/profile.py | 2 +- .../{ => profiling_and_tracing}/time_utils.py | 4 +- .../{ => profiling_and_tracing}/tracer.py | 4 -- tests/test_datasetclass_inheritance.py | 9 ++-- tests/test_deepspeed.py | 4 -- tests/test_graphs.py | 2 +- tests/test_model_loadpred.py | 4 +- tests/test_periodic_boundary_conditions.py | 5 +- tests/test_rotational_invariance.py | 6 +-- 74 files changed, 522 insertions(+), 469 deletions(-) rename hydragnn/preprocess/{utils.py => graph_samples_checks_and_updates.py} (85%) create mode 100644 hydragnn/preprocess/stratified_sampling.py delete mode 100644 hydragnn/utils/__init__.py create mode 100644 hydragnn/utils/datasets/__init__.py rename hydragnn/utils/{ => datasets}/abstractbasedataset.py (87%) rename hydragnn/utils/{ => datasets}/abstractrawdataset.py (95%) rename hydragnn/utils/{ => datasets}/adiosdataset.py (98%) rename hydragnn/utils/{ => datasets}/cfgdataset.py (97%) rename hydragnn/{preprocess => utils/datasets}/compositional_data_splitting.py (94%) rename hydragnn/utils/{ => datasets}/distdataset.py (95%) rename hydragnn/utils/{ => datasets}/lsmsdataset.py (97%) rename hydragnn/utils/{ => datasets}/pickledataset.py (95%) rename hydragnn/utils/{ => datasets}/serializeddataset.py (86%) rename hydragnn/utils/{ => datasets}/xyzdataset.py (95%) create mode 100644 hydragnn/utils/descriptors_and_embeddings/__init__.py rename hydragnn/utils/{ => descriptors_and_embeddings}/atomicdescriptors.py (100%) rename hydragnn/utils/{ => descriptors_and_embeddings}/smiles_utils.py (100%) create mode 100644 hydragnn/utils/distributed/__init__.py rename hydragnn/utils/{ => distributed}/distributed.py (99%) create mode 100644 hydragnn/utils/hpo/__init__.py rename hydragnn/utils/{ => hpo}/deephyper.py (100%) create mode 100644 hydragnn/utils/input_config_parsing/__init__.py rename hydragnn/utils/{ => input_config_parsing}/config_utils.py (97%) create mode 100644 hydragnn/utils/model/__init__.py rename hydragnn/utils/{ => model}/model.py (98%) create mode 100644 hydragnn/utils/optimizer/__init__.py rename hydragnn/utils/{ => optimizer}/optimizer.py (98%) create mode 100644 hydragnn/utils/print/__init__.py rename hydragnn/utils/{ => print}/print_utils.py (95%) create mode 100644 hydragnn/utils/profiling_and_tracing/__init__.py rename hydragnn/utils/{ => profiling_and_tracing}/gptl4py_dummy.py (97%) rename hydragnn/utils/{ => profiling_and_tracing}/profile.py (96%) rename hydragnn/utils/{ => profiling_and_tracing}/time_utils.py (97%) rename hydragnn/utils/{ => profiling_and_tracing}/tracer.py (98%) diff --git a/examples/alexandria/find_json_files.py b/examples/alexandria/find_json_files.py index 0801efb5e..acbaee0ef 100644 --- a/examples/alexandria/find_json_files.py +++ b/examples/alexandria/find_json_files.py @@ -24,7 +24,7 @@ def find_json_files(url): url_root = "https://alexandria.icams.rub.de/data" # Replace with the actual URL -dirpath = "dataset/compressed_data" +dirpath = "datasets/compressed_data" if os.path.exists(dirpath) and os.path.isdir(dirpath): shutil.rmtree(dirpath) diff --git a/examples/alexandria/train.py b/examples/alexandria/train.py index 15624c03d..82969111d 100644 --- a/examples/alexandria/train.py +++ b/examples/alexandria/train.py @@ -15,17 +15,23 @@ from torch_geometric.transforms import Distance, Spherical, LocalCartesian import hydragnn -from hydragnn.utils.time_utils import Timer +from hydragnn.utils.profiling_and_tracing.time_utils import Timer from hydragnn.utils.model import print_model -from hydragnn.utils.abstractbasedataset import AbstractBaseDataset -from hydragnn.utils.distdataset import DistDataset -from hydragnn.utils.pickledataset import SimplePickleWriter, SimplePickleDataset -from hydragnn.preprocess.utils import gather_deg -from hydragnn.preprocess.utils import RadiusGraph, RadiusGraphPBC +from hydragnn.utils.datasets.abstractbasedataset import AbstractBaseDataset +from hydragnn.utils.datasets.distdataset import DistDataset +from hydragnn.utils.datasets.pickledataset import ( + SimplePickleWriter, + SimplePickleDataset, +) +from hydragnn.preprocess.graph_samples_checks_and_updates import gather_deg +from hydragnn.preprocess.graph_samples_checks_and_updates import ( + RadiusGraph, + RadiusGraphPBC, +) from hydragnn.preprocess.load_data import split_dataset -import hydragnn.utils.tracer as tr -from hydragnn.utils.print_utils import iterate_tqdm, log +import hydragnn.utils.profiling_and_tracing.tracer as tr +from hydragnn.utils.print.print_utils import iterate_tqdm, log from generate_dictionaries_pure_elements import ( generate_dictionary_bulk_energies, @@ -38,7 +44,7 @@ pass import subprocess -from hydragnn.utils import nsplit +from hydragnn.utils.distributed import nsplit def info(*args, logtype="info", sep=" "): @@ -244,7 +250,7 @@ def get_magmoms_array_from_structure(structure): def process_file_content(self, filepath): """ - Download a file from a dataset of the Alexandria database with the respective index + Download a file from a datasets of the Alexandria database with the respective index and write it to the LMDB file with the respective index. Parameters @@ -311,7 +317,7 @@ def get(self, idx): type=bool, default=True, ) - parser.add_argument("--ddstore", action="store_true", help="ddstore dataset") + parser.add_argument("--ddstore", action="store_true", help="ddstore datasets") parser.add_argument("--ddstore_width", type=int, help="ddstore width", default=None) parser.add_argument("--shmem", action="store_true", help="shmem") parser.add_argument("--log", help="log name") @@ -321,14 +327,14 @@ def get(self, idx): group = parser.add_mutually_exclusive_group() group.add_argument( "--adios", - help="Adios dataset", + help="Adios datasets", action="store_const", dest="format", const="adios", ) group.add_argument( "--pickle", - help="Pickle dataset", + help="Pickle datasets", action="store_const", dest="format", const="pickle", @@ -341,7 +347,7 @@ def get(self, idx): node_feature_names = ["atomic_number", "cartesian_coordinates", "forces"] node_feature_dims = [1, 3, 3] dirpwd = os.path.dirname(os.path.abspath(__file__)) - datadir = os.path.join(dirpwd, "dataset") + datadir = os.path.join(dirpwd, "datasets") ################################################################################################################## input_filename = os.path.join(dirpwd, args.inputfile) ################################################################################################################## @@ -403,7 +409,7 @@ def get(self, idx): ## adios if args.format == "adios": fname = os.path.join( - os.path.dirname(__file__), "./dataset/%s.bp" % modelname + os.path.dirname(__file__), "./datasets/%s.bp" % modelname ) adwriter = AdiosWriter(fname, comm) adwriter.add("trainset", trainset) @@ -417,7 +423,7 @@ def get(self, idx): ## pickle elif args.format == "pickle": basedir = os.path.join( - os.path.dirname(__file__), "dataset", "%s.pickle" % modelname + os.path.dirname(__file__), "datasets", "%s.pickle" % modelname ) attrs = dict() attrs["pna_deg"] = deg @@ -462,14 +468,14 @@ def get(self, idx): "ddstore": args.ddstore, "ddstore_width": args.ddstore_width, } - fname = os.path.join(os.path.dirname(__file__), "./dataset/%s.bp" % modelname) + fname = os.path.join(os.path.dirname(__file__), "./datasets/%s.bp" % modelname) trainset = AdiosDataset(fname, "trainset", comm, **opt, var_config=var_config) valset = AdiosDataset(fname, "valset", comm, **opt, var_config=var_config) testset = AdiosDataset(fname, "testset", comm, **opt, var_config=var_config) elif args.format == "pickle": info("Pickle load") basedir = os.path.join( - os.path.dirname(__file__), "dataset", "%s.pickle" % modelname + os.path.dirname(__file__), "datasets", "%s.pickle" % modelname ) trainset = SimplePickleDataset( basedir=basedir, label="trainset", var_config=var_config diff --git a/examples/ani1_x/train.py b/examples/ani1_x/train.py index 89831e3b5..c4c5a25ec 100644 --- a/examples/ani1_x/train.py +++ b/examples/ani1_x/train.py @@ -1,4 +1,4 @@ -import os, re, json +import os, json import logging import sys from mpi4py import MPI @@ -7,7 +7,6 @@ import numpy as np import random - import torch # FIX random seed @@ -18,26 +17,31 @@ from torch_geometric.transforms import Distance, Spherical, LocalCartesian import hydragnn -from hydragnn.utils.time_utils import Timer +from hydragnn.utils.profiling_and_tracing.time_utils import Timer from hydragnn.utils.model import print_model -from hydragnn.utils.abstractbasedataset import AbstractBaseDataset -from hydragnn.utils.distdataset import DistDataset -from hydragnn.utils.pickledataset import SimplePickleWriter, SimplePickleDataset -from hydragnn.preprocess.utils import gather_deg -from hydragnn.preprocess.utils import RadiusGraph, RadiusGraphPBC +from hydragnn.utils.datasets.abstractbasedataset import AbstractBaseDataset +from hydragnn.utils.datasets.distdataset import DistDataset +from hydragnn.utils.datasets.pickledataset import ( + SimplePickleWriter, + SimplePickleDataset, +) +from hydragnn.preprocess.graph_samples_checks_and_updates import gather_deg +from hydragnn.preprocess.graph_samples_checks_and_updates import ( + RadiusGraph, + RadiusGraphPBC, +) from hydragnn.preprocess.load_data import split_dataset -import hydragnn.utils.tracer as tr +import hydragnn.utils.profiling_and_tracing.tracer as tr -from hydragnn.utils.print_utils import iterate_tqdm, log +from hydragnn.utils.print.print_utils import log try: from hydragnn.utils.adiosdataset import AdiosWriter, AdiosDataset except ImportError: pass -import subprocess -from hydragnn.utils import nsplit +from hydragnn.utils.distributed import nsplit import h5py @@ -193,7 +197,7 @@ def get(self, idx): type=bool, default=True, ) - parser.add_argument("--ddstore", action="store_true", help="ddstore dataset") + parser.add_argument("--ddstore", action="store_true", help="ddstore datasets") parser.add_argument("--ddstore_width", type=int, help="ddstore width", default=None) parser.add_argument("--shmem", action="store_true", help="shmem") parser.add_argument("--log", help="log name") @@ -203,14 +207,14 @@ def get(self, idx): group = parser.add_mutually_exclusive_group() group.add_argument( "--adios", - help="Adios dataset", + help="Adios datasets", action="store_const", dest="format", const="adios", ) group.add_argument( "--pickle", - help="Pickle dataset", + help="Pickle datasets", action="store_const", dest="format", const="pickle", @@ -223,7 +227,7 @@ def get(self, idx): node_feature_names = ["atomic_number", "cartesian_coordinates", "forces"] node_feature_dims = [1, 3, 3] dirpwd = os.path.dirname(os.path.abspath(__file__)) - datadir = os.path.join(dirpwd, "dataset") + datadir = os.path.join(dirpwd, "datasets") ################################################################################################################## input_filename = os.path.join(dirpwd, args.inputfile) ################################################################################################################## @@ -285,7 +289,7 @@ def get(self, idx): ## adios if args.format == "adios": fname = os.path.join( - os.path.dirname(__file__), "./dataset/%s.bp" % modelname + os.path.dirname(__file__), "./datasets/%s.bp" % modelname ) adwriter = AdiosWriter(fname, comm) adwriter.add("trainset", trainset) @@ -299,7 +303,7 @@ def get(self, idx): ## pickle elif args.format == "pickle": basedir = os.path.join( - os.path.dirname(__file__), "dataset", "%s.pickle" % modelname + os.path.dirname(__file__), "datasets", "%s.pickle" % modelname ) attrs = dict() attrs["pna_deg"] = deg @@ -344,14 +348,14 @@ def get(self, idx): "ddstore": args.ddstore, "ddstore_width": args.ddstore_width, } - fname = os.path.join(os.path.dirname(__file__), "./dataset/%s.bp" % modelname) + fname = os.path.join(os.path.dirname(__file__), "./datasets/%s.bp" % modelname) trainset = AdiosDataset(fname, "trainset", comm, **opt, var_config=var_config) valset = AdiosDataset(fname, "valset", comm, **opt, var_config=var_config) testset = AdiosDataset(fname, "testset", comm, **opt, var_config=var_config) elif args.format == "pickle": info("Pickle load") basedir = os.path.join( - os.path.dirname(__file__), "dataset", "%s.pickle" % modelname + os.path.dirname(__file__), "datasets", "%s.pickle" % modelname ) trainset = SimplePickleDataset( basedir=basedir, label="trainset", var_config=var_config diff --git a/examples/csce/train_gap.py b/examples/csce/train_gap.py index d802ad50d..37c89bb0b 100644 --- a/examples/csce/train_gap.py +++ b/examples/csce/train_gap.py @@ -13,17 +13,20 @@ import argparse import hydragnn -from hydragnn.utils.print_utils import print_distributed, iterate_tqdm, log -from hydragnn.utils.time_utils import Timer -from hydragnn.utils.distdataset import DistDataset -from hydragnn.utils.pickledataset import SimplePickleWriter, SimplePickleDataset -from hydragnn.utils.smiles_utils import ( +from hydragnn.utils.print.print_utils import print_distributed, iterate_tqdm, log +from hydragnn.utils.profiling_and_tracing.time_utils import Timer +from hydragnn.utils.datasets.distdataset import DistDataset +from hydragnn.utils.datasets.pickledataset import ( + SimplePickleWriter, + SimplePickleDataset, +) +from hydragnn.utils.descriptors_and_embeddings.smiles_utils import ( get_node_attribute_name, generate_graphdata_from_smilestr, ) -from hydragnn.preprocess.utils import gather_deg -from hydragnn.utils import nsplit -import hydragnn.utils.tracer as tr +from hydragnn.preprocess.graph_samples_checks_and_updates import gather_deg +from hydragnn.utils.distributed import nsplit +import hydragnn.utils.profiling_and_tracing.tracer as tr import numpy as np @@ -161,42 +164,42 @@ def __getitem__(self, idx): group = parser.add_mutually_exclusive_group() group.add_argument( "--adios", - help="Adios dataset", + help="Adios datasets", action="store_const", dest="format", const="adios", ) group.add_argument( "--pickle", - help="Pickle dataset", + help="Pickle datasets", action="store_const", dest="format", const="pickle", ) group.add_argument( - "--csv", help="CSV dataset", action="store_const", dest="format", const="csv" + "--csv", help="CSV datasets", action="store_const", dest="format", const="csv" ) parser.set_defaults(format="adios") group1 = parser.add_mutually_exclusive_group() group1.add_argument( "--shmem", - help="shmem dataset", + help="shmem datasets", action="store_const", - dest="dataset", + dest="datasets", const="shmem", ) group1.add_argument( "--ddstore", - help="ddstore dataset", + help="ddstore datasets", action="store_const", - dest="dataset", + dest="datasets", const="ddstore", ) group1.add_argument( "--simple", - help="no special dataset", + help="no special datasets", action="store_const", - dest="dataset", + dest="datasets", const="simple", ) parser.set_defaults(dataset="simple") @@ -206,7 +209,7 @@ def __getitem__(self, idx): graph_feature_names = ["GAP"] graph_feature_dim = [1] dirpwd = os.path.dirname(os.path.abspath(__file__)) - datafile = os.path.join(dirpwd, "dataset/csce_gap_synth.csv") + datafile = os.path.join(dirpwd, "datasets/csce_gap_synth.csv") ################################################################################################################## inputfilesubstr = args.inputfilesubstr input_filename = os.path.join(dirpwd, "csce_" + inputfilesubstr + ".json") @@ -293,7 +296,7 @@ def __getitem__(self, idx): config["pna_deg"] = deg ## pickle - basedir = os.path.join(os.path.dirname(__file__), "dataset", "pickle") + basedir = os.path.join(os.path.dirname(__file__), "datasets", "pickle") attrs = dict() attrs["pna_deg"] = deg SimplePickleWriter( @@ -316,7 +319,7 @@ def __getitem__(self, idx): use_subdir=True, ) - fname = os.path.join(os.path.dirname(__file__), "dataset", "csce_gap.bp") + fname = os.path.join(os.path.dirname(__file__), "datasets", "csce_gap.bp") adwriter = AdiosWriter(fname, comm) adwriter.add("trainset", trainset) adwriter.add("valset", valset) @@ -344,20 +347,22 @@ def __getitem__(self, idx): opt = {"preload": False, "shmem": shmem, "ddstore": ddstore} fname = fname = os.path.join( - os.path.dirname(__file__), "dataset", "csce_gap.bp" + os.path.dirname(__file__), "datasets", "csce_gap.bp" ) trainset = AdiosDataset(fname, "trainset", comm, **opt) valset = AdiosDataset(fname, "valset", comm) testset = AdiosDataset(fname, "testset", comm) comm.Barrier() elif args.format == "csv": - fname = os.path.join(os.path.dirname(__file__), "dataset", "csce_gap_synth.csv") + fname = os.path.join( + os.path.dirname(__file__), "datasets", "csce_gap_synth.csv" + ) fact = CSCEDatasetFactory(fname, args.sampling, var_config=var_config) trainset = CSCEDataset(fact, "trainset") valset = CSCEDataset(fact, "valset") testset = CSCEDataset(fact, "testset") elif args.format == "pickle": - basedir = os.path.join(os.path.dirname(__file__), "dataset", "pickle") + basedir = os.path.join(os.path.dirname(__file__), "datasets", "pickle") trainset = SimplePickleDataset(basedir, "trainset") valset = SimplePickleDataset(basedir, "valset") testset = SimplePickleDataset(basedir, "testset") diff --git a/examples/dftb_uv_spectrum/train_discrete_uv_spectrum.py b/examples/dftb_uv_spectrum/train_discrete_uv_spectrum.py index b81fd5b9b..2540893ae 100644 --- a/examples/dftb_uv_spectrum/train_discrete_uv_spectrum.py +++ b/examples/dftb_uv_spectrum/train_discrete_uv_spectrum.py @@ -29,7 +29,7 @@ from hydragnn.preprocess.load_data import split_dataset from hydragnn.utils.distdataset import DistDataset from hydragnn.utils.pickledataset import SimplePickleWriter, SimplePickleDataset -from hydragnn.preprocess.utils import gather_deg +from hydragnn.preprocess.graph_samples_checks_and_updates import gather_deg import numpy as np @@ -75,7 +75,7 @@ def dftb_to_graph(moldir, dftb_node_types, var_config): class DFTBDataset(AbstractBaseDataset): - """DFTBDataset dataset class""" + """DFTBDataset datasets class""" def __init__(self, dirpath, dftb_node_types, var_config, dist=False, sampling=None): super().__init__() @@ -138,7 +138,7 @@ def get(self, idx): help="preprocess only (no training)", ) parser.add_argument("--mae", action="store_true", help="do mae calculation") - parser.add_argument("--ddstore", action="store_true", help="ddstore dataset") + parser.add_argument("--ddstore", action="store_true", help="ddstore datasets") parser.add_argument("--ddstore_width", type=int, help="ddstore width", default=None) parser.add_argument("--shmem", action="store_true", help="shmem") parser.add_argument("--log", help="log name") @@ -148,14 +148,14 @@ def get(self, idx): group = parser.add_mutually_exclusive_group() group.add_argument( "--adios", - help="Adios dataset", + help="Adios datasets", action="store_const", dest="format", const="adios", ) group.add_argument( "--pickle", - help="Pickle dataset", + help="Pickle datasets", action="store_const", dest="format", const="pickle", @@ -166,7 +166,7 @@ def get(self, idx): graph_feature_names = ["frequencies", "intensities"] graph_feature_dim = [50, 50] dirpwd = os.path.dirname(os.path.abspath(__file__)) - datafile = os.path.join(dirpwd, "dataset/dftb_aisd_electronic_excitation_spectrum") + datafile = os.path.join(dirpwd, "datasets/dftb_aisd_electronic_excitation_spectrum") ################################################################################################################## input_filename = os.path.join(dirpwd, "dftb_discrete_uv_spectrum.json") ################################################################################################################## @@ -227,7 +227,7 @@ def get(self, idx): config["pna_deg"] = deg ## adios - fname = os.path.join(os.path.dirname(__file__), "./dataset/%s.bp" % modelname) + fname = os.path.join(os.path.dirname(__file__), "./datasets/%s.bp" % modelname) adwriter = AdiosWriter(fname, comm) adwriter.add("trainset", trainset) adwriter.add("valset", valset) @@ -239,7 +239,7 @@ def get(self, idx): ## pickle basedir = os.path.join( - os.path.dirname(__file__), "dataset", "%s.pickle" % modelname + os.path.dirname(__file__), "datasets", "%s.pickle" % modelname ) attrs = dict() attrs["pna_deg"] = deg @@ -283,14 +283,14 @@ def get(self, idx): "ddstore": args.ddstore, "ddstore_width": args.ddstore_width, } - fname = os.path.join(os.path.dirname(__file__), "./dataset/%s.bp" % modelname) + fname = os.path.join(os.path.dirname(__file__), "./datasets/%s.bp" % modelname) trainset = AdiosDataset(fname, "trainset", comm, **opt) valset = AdiosDataset(fname, "valset", comm, **opt) testset = AdiosDataset(fname, "testset", comm, **opt) elif args.format == "pickle": info("Pickle load") basedir = os.path.join( - os.path.dirname(__file__), "dataset", "%s.pickle" % modelname + os.path.dirname(__file__), "datasets", "%s.pickle" % modelname ) trainset = SimplePickleDataset(basedir, "trainset") valset = SimplePickleDataset(basedir, "valset") diff --git a/examples/dftb_uv_spectrum/train_smooth_uv_spectrum.py b/examples/dftb_uv_spectrum/train_smooth_uv_spectrum.py index 5af612c4a..fe721822e 100644 --- a/examples/dftb_uv_spectrum/train_smooth_uv_spectrum.py +++ b/examples/dftb_uv_spectrum/train_smooth_uv_spectrum.py @@ -31,7 +31,7 @@ from hydragnn.preprocess.load_data import split_dataset from hydragnn.utils.distdataset import DistDataset from hydragnn.utils.pickledataset import SimplePickleWriter, SimplePickleDataset -from hydragnn.preprocess.utils import gather_deg +from hydragnn.preprocess.graph_samples_checks_and_updates import gather_deg import numpy as np @@ -75,7 +75,7 @@ def dftb_to_graph(moldir, dftb_node_types, var_config): class DFTBDataset(AbstractBaseDataset): - """DFTBDataset dataset class""" + """DFTBDataset datasets class""" def __init__(self, dirpath, dftb_node_types, var_config, dist=False, sampling=None): super().__init__() @@ -138,7 +138,7 @@ def get(self, idx): help="preprocess only (no training)", ) parser.add_argument("--mae", action="store_true", help="do mae calculation") - parser.add_argument("--ddstore", action="store_true", help="ddstore dataset") + parser.add_argument("--ddstore", action="store_true", help="ddstore datasets") parser.add_argument("--ddstore_width", type=int, help="ddstore width", default=None) parser.add_argument("--shmem", action="store_true", help="shmem") parser.add_argument("--log", help="log name") @@ -148,14 +148,14 @@ def get(self, idx): group = parser.add_mutually_exclusive_group() group.add_argument( "--adios", - help="Adios dataset", + help="Adios datasets", action="store_const", dest="format", const="adios", ) group.add_argument( "--pickle", - help="Pickle dataset", + help="Pickle datasets", action="store_const", dest="format", const="pickle", @@ -166,7 +166,7 @@ def get(self, idx): graph_feature_names = ["spectrum"] graph_feature_dim = [37500] dirpwd = os.path.dirname(os.path.abspath(__file__)) - datafile = os.path.join(dirpwd, "dataset/dftb_aisd_electronic_excitation_spectrum") + datafile = os.path.join(dirpwd, "datasets/dftb_aisd_electronic_excitation_spectrum") ################################################################################################################## input_filename = os.path.join(dirpwd, "dftb_smooth_uv_spectrum.json") ################################################################################################################## @@ -227,7 +227,7 @@ def get(self, idx): config["pna_deg"] = deg ## adios - fname = os.path.join(os.path.dirname(__file__), "./dataset/%s.bp" % modelname) + fname = os.path.join(os.path.dirname(__file__), "./datasets/%s.bp" % modelname) adwriter = AdiosWriter(fname, comm) adwriter.add("trainset", trainset) adwriter.add("valset", valset) @@ -239,7 +239,7 @@ def get(self, idx): ## pickle basedir = os.path.join( - os.path.dirname(__file__), "dataset", "%s.pickle" % modelname + os.path.dirname(__file__), "datasets", "%s.pickle" % modelname ) attrs = dict() attrs["pna_deg"] = deg @@ -283,14 +283,14 @@ def get(self, idx): "ddstore": args.ddstore, "ddstore_width": args.ddstore_width, } - fname = os.path.join(os.path.dirname(__file__), "./dataset/%s.bp" % modelname) + fname = os.path.join(os.path.dirname(__file__), "./datasets/%s.bp" % modelname) trainset = AdiosDataset(fname, "trainset", comm, **opt) valset = AdiosDataset(fname, "valset", comm, **opt) testset = AdiosDataset(fname, "testset", comm, **opt) elif args.format == "pickle": info("Pickle load") basedir = os.path.join( - os.path.dirname(__file__), "dataset", "%s.pickle" % modelname + os.path.dirname(__file__), "datasets", "%s.pickle" % modelname ) trainset = SimplePickleDataset(basedir, "trainset") valset = SimplePickleDataset(basedir, "valset") diff --git a/examples/eam/eam.py b/examples/eam/eam.py index 64a8b804e..7c6340c0c 100644 --- a/examples/eam/eam.py +++ b/examples/eam/eam.py @@ -5,13 +5,15 @@ import argparse import hydragnn -from hydragnn.utils.time_utils import Timer -from hydragnn.utils.config_utils import get_log_name_config +from hydragnn.utils.profiling_and_tracing.time_utils import Timer +from hydragnn.utils.input_config_parsing.config_utils import get_log_name_config from hydragnn.utils.model import print_model -from hydragnn.utils.cfgdataset import CFGDataset -from hydragnn.utils.serializeddataset import SerializedWriter, SerializedDataset +from hydragnn.utils.datasets.cfgdataset import CFGDataset +from hydragnn.utils.datasets.serializeddataset import ( + SerializedWriter, + SerializedDataset, +) from hydragnn.preprocess.load_data import split_dataset -from hydragnn.utils.print_utils import log try: from hydragnn.utils.adiosdataset import AdiosWriter, AdiosDataset @@ -44,14 +46,14 @@ def info(*args, logtype="info", sep=" "): group = parser.add_mutually_exclusive_group() group.add_argument( "--adios", - help="Adios dataset", + help="Adios datasets", action="store_const", dest="format", const="adios", ) group.add_argument( "--pickle", - help="Pickle dataset", + help="Pickle datasets", action="store_const", dest="format", const="pickle", @@ -77,9 +79,9 @@ def info(*args, logtype="info", sep=" "): datefmt="%H:%M:%S", ) - os.environ["SERIALIZED_DATA_PATH"] = dirpwd + "/dataset" + os.environ["SERIALIZED_DATA_PATH"] = dirpwd + "/datasets" datasetname = config["Dataset"]["name"] - fname_adios = dirpwd + "/dataset/%s.bp" % (datasetname) + fname_adios = dirpwd + "/datasets/%s.bp" % (datasetname) config["Dataset"]["name"] = "%s_%d" % (datasetname, rank) if not args.loadexistingsplit: total = CFGDataset(config) @@ -93,7 +95,7 @@ def info(*args, logtype="info", sep=" "): if args.format == "adios": fname = os.path.join( - os.path.dirname(__file__), "./dataset/%s.bp" % datasetname + os.path.dirname(__file__), "./datasets/%s.bp" % datasetname ) adwriter = AdiosWriter(fname, MPI.COMM_SELF) adwriter.add("trainset", trainset) @@ -104,7 +106,7 @@ def info(*args, logtype="info", sep=" "): adwriter.save() elif args.format == "pickle": basedir = os.path.join( - os.path.dirname(__file__), "dataset", "serialized_dataset" + os.path.dirname(__file__), "datasets", "serialized_dataset" ) SerializedWriter( trainset, @@ -138,14 +140,16 @@ def info(*args, logtype="info", sep=" "): "preload": True, "shmem": False, } - fname = os.path.join(os.path.dirname(__file__), "./dataset/%s.bp" % datasetname) + fname = os.path.join( + os.path.dirname(__file__), "./datasets/%s.bp" % datasetname + ) trainset = AdiosDataset(fname, "trainset", comm, **opt) valset = AdiosDataset(fname, "valset", comm, **opt) testset = AdiosDataset(fname, "testset", comm, **opt) elif args.format == "pickle": info("Pickle load") basedir = os.path.join( - os.path.dirname(__file__), "dataset", "serialized_dataset" + os.path.dirname(__file__), "datasets", "serialized_dataset" ) trainset = SerializedDataset(basedir, datasetname, "trainset") valset = SerializedDataset(basedir, datasetname, "valset") diff --git a/examples/ising_model/train_ising.py b/examples/ising_model/train_ising.py index 892b08824..de654c17f 100644 --- a/examples/ising_model/train_ising.py +++ b/examples/ising_model/train_ising.py @@ -11,15 +11,18 @@ import argparse import hydragnn -from hydragnn.utils.print_utils import print_distributed, iterate_tqdm, log -from hydragnn.utils.time_utils import Timer -from hydragnn.utils.config_utils import get_log_name_config +from hydragnn.utils.print.print_utils import print_distributed, iterate_tqdm, log +from hydragnn.utils.profiling_and_tracing.time_utils import Timer +from hydragnn.utils.input_config_parsing.config_utils import get_log_name_config from hydragnn.preprocess.load_data import split_dataset from hydragnn.utils.model import print_model -from hydragnn.utils.lsmsdataset import LSMSDataset -from hydragnn.utils.distdataset import DistDataset -from hydragnn.utils.pickledataset import SimplePickleWriter, SimplePickleDataset -from hydragnn.preprocess.utils import gather_deg +from hydragnn.utils.datasets.lsmsdataset import LSMSDataset +from hydragnn.utils.datasets.distdataset import DistDataset +from hydragnn.utils.datasets.pickledataset import ( + SimplePickleWriter, + SimplePickleDataset, +) +from hydragnn.preprocess.graph_samples_checks_and_updates import gather_deg import numpy as np @@ -45,8 +48,8 @@ from create_configurations import E_dimensionless -import hydragnn.utils.tracer as tr -from hydragnn.utils import nsplit +import hydragnn.utils.profiling_and_tracing.tracer as tr +from hydragnn.utils.distributed import nsplit def write_to_file(total_energy, atomic_features, count_config, dir, prefix): @@ -90,7 +93,7 @@ def create_dataset_mpi( os.makedirs(subdir, exist_ok=True) for num_downs in iterate_tqdm( - range(rx.start, rx.stop), verbosity_level=2, desc="Creating dataset" + range(rx.start, rx.stop), verbosity_level=2, desc="Creating datasets" ): prefix = "output_%d_" % num_downs subdir = os.path.join(dir, str(num_downs)) @@ -159,21 +162,21 @@ def info(*args, logtype="info", sep=" "): ) parser.add_argument("--seed", type=int, help="seed", default=43) parser.add_argument("--sampling", type=float, help="sampling ratio", default=None) - parser.add_argument("--ddstore", action="store_true", help="ddstore dataset") + parser.add_argument("--ddstore", action="store_true", help="ddstore datasets") parser.add_argument("--ddstore_width", type=int, help="ddstore width", default=None) parser.add_argument("--log", help="log name") parser.add_argument("--everyone", action="store_true", help="gptimer") group = parser.add_mutually_exclusive_group() group.add_argument( "--adios", - help="Adios dataset", + help="Adios datasets", action="store_const", dest="format", const="adios", ) group.add_argument( "--pickle", - help="Pickle dataset", + help="Pickle datasets", action="store_const", dest="format", const="pickle", @@ -223,12 +226,12 @@ def info(*args, logtype="info", sep=" "): """ Parallel ising data generation step: 1. Generate ising data (*.txt) in parallel (create_dataset_mpi) - 2. Read raw dataset in parallel (*.txt) (RawDataset) + 2. Read raw datasets in parallel (*.txt) (RawDataset) 3. Split into a train, valid, and test set (split_dataset) 4. Save as Adios file in parallel """ sys.setrecursionlimit(1000000) - dir = os.path.join(os.path.dirname(__file__), "./dataset/%s" % modelname) + dir = os.path.join(os.path.dirname(__file__), "./datasets/%s" % modelname) if rank == 0: if os.path.exists(dir): shutil.rmtree(dir) @@ -267,7 +270,7 @@ def info(*args, logtype="info", sep=" "): config["pna_deg"] = deg basedir = os.path.join( - os.path.dirname(__file__), "dataset", "%s.pickle" % modelname + os.path.dirname(__file__), "datasets", "%s.pickle" % modelname ) attrs = dict() attrs["minmax_node_feature"] = total.minmax_node_feature @@ -293,7 +296,7 @@ def info(*args, logtype="info", sep=" "): use_subdir=True, ) - fname = os.path.join(os.path.dirname(__file__), "./dataset/%s.bp" % modelname) + fname = os.path.join(os.path.dirname(__file__), "./datasets/%s.bp" % modelname) adwriter = AdiosWriter(fname, comm) adwriter.add("trainset", trainset) adwriter.add("valset", valset) @@ -318,14 +321,14 @@ def info(*args, logtype="info", sep=" "): "ddstore": args.ddstore, "ddstore_width": args.ddstore_width, } - fname = os.path.join(os.path.dirname(__file__), "./dataset/%s.bp" % modelname) + fname = os.path.join(os.path.dirname(__file__), "./datasets/%s.bp" % modelname) trainset = AdiosDataset(fname, "trainset", comm, **opt) valset = AdiosDataset(fname, "valset", comm, **opt) testset = AdiosDataset(fname, "testset", comm, **opt) elif args.format == "pickle": info("Pickle load") basedir = os.path.join( - os.path.dirname(__file__), "dataset", "%s.pickle" % modelname + os.path.dirname(__file__), "datasets", "%s.pickle" % modelname ) trainset = SimplePickleDataset(basedir, "trainset") valset = SimplePickleDataset(basedir, "valset") diff --git a/examples/lsms/lsms.py b/examples/lsms/lsms.py index 1c97ba33f..77ef10e92 100644 --- a/examples/lsms/lsms.py +++ b/examples/lsms/lsms.py @@ -5,11 +5,14 @@ import argparse import hydragnn -from hydragnn.utils.time_utils import Timer -from hydragnn.utils.config_utils import get_log_name_config +from hydragnn.utils.profiling_and_tracing.time_utils import Timer +from hydragnn.utils.input_config_parsing.config_utils import get_log_name_config from hydragnn.utils.model import print_model -from hydragnn.utils.lsmsdataset import LSMSDataset -from hydragnn.utils.serializeddataset import SerializedWriter, SerializedDataset +from hydragnn.utils.datasets.lsmsdataset import LSMSDataset +from hydragnn.utils.datasets.serializeddataset import ( + SerializedWriter, + SerializedDataset, +) from hydragnn.preprocess.load_data import split_dataset try: @@ -45,14 +48,14 @@ def info(*args, logtype="info", sep=" "): group = parser.add_mutually_exclusive_group() group.add_argument( "--adios", - help="Adios dataset", + help="Adios datasets", action="store_const", dest="format", const="adios", ) group.add_argument( "--pickle", - help="Pickle dataset", + help="Pickle datasets", action="store_const", dest="format", const="pickle", @@ -95,7 +98,7 @@ def info(*args, logtype="info", sep=" "): if args.format == "adios": fname = os.path.join( - os.path.dirname(__file__), "./dataset/%s.bp" % datasetname + os.path.dirname(__file__), "./datasets/%s.bp" % datasetname ) adwriter = AdiosWriter(fname, MPI.COMM_SELF) adwriter.add("trainset", trainset) @@ -140,7 +143,9 @@ def info(*args, logtype="info", sep=" "): "preload": True, "shmem": False, } - fname = os.path.join(os.path.dirname(__file__), "./dataset/%s.bp" % datasetname) + fname = os.path.join( + os.path.dirname(__file__), "./datasets/%s.bp" % datasetname + ) trainset = AdiosDataset(fname, "trainset", comm, **opt) valset = AdiosDataset(fname, "valset", comm, **opt) testset = AdiosDataset(fname, "testset", comm, **opt) diff --git a/examples/md17/md17.py b/examples/md17/md17.py index 7e71b872e..9ad829db4 100644 --- a/examples/md17/md17.py +++ b/examples/md17/md17.py @@ -54,17 +54,17 @@ def md17_pre_filter(data): # Enable print to log file. hydragnn.utils.setup_log(log_name) -# Use built-in torch_geometric dataset. +# Use built-in torch_geometric datasets. # Filter function above used to run quick example. # NOTE: data is moved to the device in the pre-transform. # NOTE: transforms/filters will NOT be re-run unless the qm9/processed/ directory is removed. compute_edges = hydragnn.preprocess.get_radius_graph_config(arch_config) -# Fix for MD17 dataset +# Fix for MD17 datasets torch_geometric.datasets.MD17.file_names["uracil"] = "md17_uracil.npz" dataset = torch_geometric.datasets.MD17( - root="dataset/md17", + root="datasets/md17", name="uracil", pre_transform=md17_pre_transform, pre_filter=md17_pre_filter, @@ -90,7 +90,7 @@ def md17_pre_filter(data): optimizer, mode="min", factor=0.5, patience=5, min_lr=0.00001 ) -# Run training with the given model and qm9 dataset. +# Run training with the given model and qm9 datasets. writer = hydragnn.utils.get_summary_writer(log_name) hydragnn.utils.save_config(config, log_name) diff --git a/examples/mptrj/train.py b/examples/mptrj/train.py index 36f9ba821..190501e59 100644 --- a/examples/mptrj/train.py +++ b/examples/mptrj/train.py @@ -17,18 +17,24 @@ from torch_geometric.transforms import Distance, Spherical, LocalCartesian import hydragnn -from hydragnn.utils.time_utils import Timer +from hydragnn.utils.profiling_and_tracing.time_utils import Timer from hydragnn.utils.model import print_model -from hydragnn.utils.abstractbasedataset import AbstractBaseDataset -from hydragnn.utils.distdataset import DistDataset -from hydragnn.utils.pickledataset import SimplePickleWriter, SimplePickleDataset -from hydragnn.preprocess.utils import gather_deg -from hydragnn.preprocess.utils import RadiusGraph, RadiusGraphPBC +from hydragnn.utils.datasets.abstractbasedataset import AbstractBaseDataset +from hydragnn.utils.datasets.distdataset import DistDataset +from hydragnn.utils.datasets.pickledataset import ( + SimplePickleWriter, + SimplePickleDataset, +) +from hydragnn.preprocess.graph_samples_checks_and_updates import gather_deg +from hydragnn.preprocess.graph_samples_checks_and_updates import ( + RadiusGraph, + RadiusGraphPBC, +) from hydragnn.preprocess.load_data import split_dataset -import hydragnn.utils.tracer as tr +import hydragnn.utils.profiling_and_tracing.tracer as tr -from hydragnn.utils.print_utils import iterate_tqdm, log +from hydragnn.utils.print.print_utils import iterate_tqdm, log from jarvis.db.jsonutils import loadjson, dumpjson from pymatgen.core.structure import Structure @@ -43,7 +49,7 @@ pass import subprocess -from hydragnn.utils import nsplit +from hydragnn.utils.distributed import nsplit def info(*args, logtype="info", sep=" "): @@ -202,7 +208,7 @@ def get(self, idx): type=bool, default=True, ) - parser.add_argument("--ddstore", action="store_true", help="ddstore dataset") + parser.add_argument("--ddstore", action="store_true", help="ddstore datasets") parser.add_argument("--ddstore_width", type=int, help="ddstore width", default=None) parser.add_argument("--shmem", action="store_true", help="shmem") parser.add_argument("--log", help="log name") @@ -217,14 +223,14 @@ def get(self, idx): group = parser.add_mutually_exclusive_group() group.add_argument( "--adios", - help="Adios dataset", + help="Adios datasets", action="store_const", dest="format", const="adios", ) group.add_argument( "--pickle", - help="Pickle dataset", + help="Pickle datasets", action="store_const", dest="format", const="pickle", @@ -237,7 +243,7 @@ def get(self, idx): node_feature_names = ["atomic_number", "cartesian_coordinates", "forces"] node_feature_dims = [1, 3, 3] dirpwd = os.path.dirname(os.path.abspath(__file__)) - datadir = os.path.join(dirpwd, "dataset") + datadir = os.path.join(dirpwd, "datasets") ################################################################################################################## input_filename = os.path.join(dirpwd, args.inputfile) ################################################################################################################## @@ -300,7 +306,7 @@ def get(self, idx): ## adios if args.format == "adios": fname = os.path.join( - os.path.dirname(__file__), "./dataset/%s.bp" % modelname + os.path.dirname(__file__), "./datasets/%s.bp" % modelname ) adwriter = AdiosWriter(fname, comm) adwriter.add("trainset", trainset) @@ -314,7 +320,7 @@ def get(self, idx): ## pickle elif args.format == "pickle": basedir = os.path.join( - os.path.dirname(__file__), "dataset", "%s.pickle" % modelname + os.path.dirname(__file__), "datasets", "%s.pickle" % modelname ) attrs = dict() attrs["pna_deg"] = deg @@ -359,14 +365,14 @@ def get(self, idx): "ddstore": args.ddstore, "ddstore_width": args.ddstore_width, } - fname = os.path.join(os.path.dirname(__file__), "./dataset/%s.bp" % modelname) + fname = os.path.join(os.path.dirname(__file__), "./datasets/%s.bp" % modelname) trainset = AdiosDataset(fname, "trainset", comm, **opt, var_config=var_config) valset = AdiosDataset(fname, "valset", comm, **opt, var_config=var_config) testset = AdiosDataset(fname, "testset", comm, **opt, var_config=var_config) elif args.format == "pickle": info("Pickle load") basedir = os.path.join( - os.path.dirname(__file__), "dataset", "%s.pickle" % modelname + os.path.dirname(__file__), "datasets", "%s.pickle" % modelname ) trainset = SimplePickleDataset( basedir=basedir, label="trainset", var_config=var_config diff --git a/examples/multidataset/energy_linear_regression.py b/examples/multidataset/energy_linear_regression.py index e65b5e993..73a15ed3c 100644 --- a/examples/multidataset/energy_linear_regression.py +++ b/examples/multidataset/energy_linear_regression.py @@ -14,13 +14,13 @@ def subset(i): - # sz = len(dataset) + # sz = len(datasets) # chunk = sz // C.procs # left = sz % C.procs # a = i*chunk + min(i, left) # b = (i+1)*chunk + min(i+1, left) # print(f"Rank {i}/{C.procs} converting subset [{a},{b})") - # return np.array([np.array(x) for x in dataset[a:b]["image"]]) + # return np.array([np.array(x) for x in datasets[a:b]["image"]]) return np.random.random((100, 4)) @@ -101,7 +101,7 @@ def solve_least_squares_svd(A, b): comm_rank = comm.Get_rank() comm_size = comm.Get_size() - fname = os.path.join(os.path.dirname(__file__), "./dataset/%s.bp" % args.modelname) + fname = os.path.join(os.path.dirname(__file__), "./datasets/%s.bp" % args.modelname) print("fname:", fname) trainset = AdiosDataset( fname, @@ -123,7 +123,7 @@ def solve_least_squares_svd(A, b): ) pna_deg = trainset.pna_deg - ## Iterate over local dataset + ## Iterate over local datasets energy_list = list() feature_list = list() for dataset in [trainset, valset, testset]: @@ -205,7 +205,7 @@ def solve_least_squares_svd(A, b): ## Writing fname = os.path.join( - os.path.dirname(__file__), "./dataset/%s-v2.bp" % args.modelname + os.path.dirname(__file__), "./datasets/%s-v2.bp" % args.modelname ) if comm_rank == 0: print("Saving:", fname) diff --git a/examples/multidataset/train.py b/examples/multidataset/train.py index 370a58ddf..525bd84be 100644 --- a/examples/multidataset/train.py +++ b/examples/multidataset/train.py @@ -13,15 +13,15 @@ import numpy as np import hydragnn -from hydragnn.utils.time_utils import Timer +from hydragnn.utils.profiling_and_tracing.time_utils import Timer from hydragnn.utils.model import print_model -from hydragnn.utils.distdataset import DistDataset -from hydragnn.utils.pickledataset import SimplePickleDataset +from hydragnn.utils.datasets.distdataset import DistDataset +from hydragnn.utils.datasets.pickledataset import SimplePickleDataset -import hydragnn.utils.tracer as tr +import hydragnn.utils.profiling_and_tracing.tracer as tr -from hydragnn.utils.print_utils import log, log0 -from hydragnn.utils import nsplit +from hydragnn.utils.print.print_utils import log, log0 +from hydragnn.utils.distributed import nsplit try: from hydragnn.utils.adiosdataset import AdiosDataset @@ -46,7 +46,7 @@ def info(*args, logtype="info", sep=" "): parser.add_argument( "--inputfile", help="input file", type=str, default="gfm_multitasking.json" ) - parser.add_argument("--ddstore", action="store_true", help="ddstore dataset") + parser.add_argument("--ddstore", action="store_true", help="ddstore datasets") parser.add_argument("--ddstore_width", type=int, help="ddstore width", default=None) parser.add_argument("--shmem", action="store_true", help="shmem") parser.add_argument("--log", help="log name") @@ -73,21 +73,21 @@ def info(*args, logtype="info", sep=" "): group = parser.add_mutually_exclusive_group() group.add_argument( "--adios", - help="Adios dataset", + help="Adios datasets", action="store_const", dest="format", const="adios", ) group.add_argument( "--pickle", - help="Pickle dataset", + help="Pickle datasets", action="store_const", dest="format", const="pickle", ) group.add_argument( "--multi", - help="Multi dataset", + help="Multi datasets", action="store_const", dest="format", const="multi", @@ -100,7 +100,7 @@ def info(*args, logtype="info", sep=" "): node_feature_names = ["atomic_number", "cartesian_coordinates", "forces"] node_feature_dims = [1, 3, 3] dirpwd = os.path.dirname(os.path.abspath(__file__)) - datadir = os.path.join(dirpwd, "dataset") + datadir = os.path.join(dirpwd, "datasets") ################################################################################################################## input_filename = os.path.join(dirpwd, args.inputfile) ################################################################################################################## @@ -156,14 +156,14 @@ def info(*args, logtype="info", sep=" "): "ddstore": args.ddstore, "ddstore_width": args.ddstore_width, } - fname = os.path.join(os.path.dirname(__file__), "./dataset/%s.bp" % modelname) + fname = os.path.join(os.path.dirname(__file__), "./datasets/%s.bp" % modelname) trainset = AdiosDataset(fname, "trainset", comm, **opt, var_config=var_config) valset = AdiosDataset(fname, "valset", comm, **opt, var_config=var_config) testset = AdiosDataset(fname, "testset", comm, **opt, var_config=var_config) elif args.format == "pickle": info("Pickle load") basedir = os.path.join( - os.path.dirname(__file__), "dataset", "%s.pickle" % modelname + os.path.dirname(__file__), "datasets", "%s.pickle" % modelname ) trainset = SimplePickleDataset( basedir=basedir, label="trainset", var_config=var_config @@ -194,7 +194,7 @@ def info(*args, logtype="info", sep=" "): pna_deg_list = list() for model in modellist: fname = os.path.join( - os.path.dirname(__file__), "./dataset/%s.bp" % model + os.path.dirname(__file__), "./datasets/%s.bp" % model ) with ad2.open(fname, "r", MPI.COMM_SELF) as f: f.__next__() @@ -259,7 +259,7 @@ def info(*args, logtype="info", sep=" "): "pos", "y", ] - fname = os.path.join(os.path.dirname(__file__), "./dataset/%s.bp" % mymodel) + fname = os.path.join(os.path.dirname(__file__), "./datasets/%s.bp" % mymodel) trainset = AdiosDataset( fname, "trainset", diff --git a/examples/multidataset_hpo/gfm.py b/examples/multidataset_hpo/gfm.py index 4228fb39f..d2b99fdb1 100644 --- a/examples/multidataset_hpo/gfm.py +++ b/examples/multidataset_hpo/gfm.py @@ -8,15 +8,15 @@ import numpy as np import hydragnn -from hydragnn.utils.time_utils import Timer +from hydragnn.utils.profiling_and_tracing.time_utils import Timer from hydragnn.utils.model import print_model -from hydragnn.utils.distdataset import DistDataset -from hydragnn.utils.pickledataset import SimplePickleDataset +from hydragnn.utils.datasets.distdataset import DistDataset +from hydragnn.utils.datasets.pickledataset import SimplePickleDataset -import hydragnn.utils.tracer as tr +import hydragnn.utils.profiling_and_tracing.tracer as tr -from hydragnn.utils.print_utils import log -from hydragnn.utils import nsplit +from hydragnn.utils.print.print_utils import log +from hydragnn.utils.distributed import nsplit try: from hydragnn.utils.adiosdataset import AdiosDataset @@ -53,7 +53,7 @@ def main(): parser.add_argument("--num_headlayers", type=int, help="num_headlayers", default=2) parser.add_argument("--dim_headlayers", type=int, help="dim_headlayers", default=10) - parser.add_argument("--ddstore", action="store_true", help="ddstore dataset") + parser.add_argument("--ddstore", action="store_true", help="ddstore datasets") parser.add_argument("--ddstore_width", type=int, help="ddstore width", default=None) parser.add_argument("--shmem", action="store_true", help="shmem") parser.add_argument("--log", help="log name", default="gfm_test") @@ -74,21 +74,21 @@ def main(): group = parser.add_mutually_exclusive_group() group.add_argument( "--adios", - help="Adios dataset", + help="Adios datasets", action="store_const", dest="format", const="adios", ) group.add_argument( "--pickle", - help="Pickle dataset", + help="Pickle datasets", action="store_const", dest="format", const="pickle", ) group.add_argument( "--multi", - help="Multi dataset", + help="Multi datasets", action="store_const", dest="format", const="multi", @@ -102,7 +102,7 @@ def main(): node_feature_names = ["atomic_number", "cartesian_coordinates", "forces"] node_feature_dims = [1, 3, 3] dirpwd = os.path.dirname(os.path.abspath(__file__)) - datadir = os.path.join(dirpwd, "dataset") + datadir = os.path.join(dirpwd, "datasets") ################################################################################################################## input_filename = os.path.join(dirpwd, args.inputfile) ################################################################################################################## @@ -185,14 +185,14 @@ def main(): "ddstore": args.ddstore, "ddstore_width": args.ddstore_width, } - fname = os.path.join(os.path.dirname(__file__), "./dataset/%s.bp" % modelname) + fname = os.path.join(os.path.dirname(__file__), "./datasets/%s.bp" % modelname) trainset = AdiosDataset(fname, "trainset", comm, **opt, var_config=var_config) valset = AdiosDataset(fname, "valset", comm, **opt, var_config=var_config) testset = AdiosDataset(fname, "testset", comm, **opt, var_config=var_config) elif args.format == "pickle": info("Pickle load") basedir = os.path.join( - os.path.dirname(__file__), "dataset", "%s.pickle" % modelname + os.path.dirname(__file__), "datasets", "%s.pickle" % modelname ) trainset = SimplePickleDataset( basedir=basedir, label="trainset", var_config=var_config @@ -223,7 +223,7 @@ def main(): pna_deg_list = list() for model in modellist: fname = os.path.join( - os.path.dirname(__file__), "./dataset/%s.bp" % model + os.path.dirname(__file__), "./datasets/%s.bp" % model ) with ad2.open(fname, "r", MPI.COMM_SELF) as f: f.__next__() @@ -288,7 +288,7 @@ def main(): "pos", "y", ] - fname = os.path.join(os.path.dirname(__file__), "./dataset/%s.bp" % mymodel) + fname = os.path.join(os.path.dirname(__file__), "./datasets/%s.bp" % mymodel) trainset = AdiosDataset( fname, "trainset", diff --git a/examples/ogb/train_gap.py b/examples/ogb/train_gap.py index 52174b26f..586927427 100644 --- a/examples/ogb/train_gap.py +++ b/examples/ogb/train_gap.py @@ -8,25 +8,26 @@ import sys from tqdm import tqdm from mpi4py import MPI -from itertools import chain import argparse -import time import math import hydragnn from hydragnn.preprocess.load_data import split_dataset -from hydragnn.utils.print_utils import print_distributed, iterate_tqdm -from hydragnn.utils.time_utils import Timer -from hydragnn.utils.pickledataset import SimplePickleWriter, SimplePickleDataset -from hydragnn.preprocess.utils import gather_deg +from hydragnn.utils.print.print_utils import print_distributed +from hydragnn.utils.profiling_and_tracing.time_utils import Timer +from hydragnn.utils.datasets.pickledataset import ( + SimplePickleWriter, + SimplePickleDataset, +) +from hydragnn.preprocess.graph_samples_checks_and_updates import gather_deg from hydragnn.utils.model import print_model -from hydragnn.utils.smiles_utils import ( +from hydragnn.utils.descriptors_and_embeddings.smiles_utils import ( get_node_attribute_name, generate_graphdata_from_smilestr, ) -from hydragnn.utils.config_utils import parse_deepspeed_config +from hydragnn.utils.input_config_parsing.config_utils import parse_deepspeed_config from hydragnn.utils.distributed import get_deepspeed_init_args -from hydragnn.utils import nsplit +from hydragnn.utils.distributed import nsplit import numpy as np @@ -136,7 +137,7 @@ def smiles_to_graph(datadir, files_list): class OGBDataset(AbstractBaseDataset): - """OGBDataset dataset class""" + """OGBDataset datasets class""" def __init__(self, dirpath, var_config, dist=False): super().__init__() @@ -264,20 +265,20 @@ def __getitem__(self, idx): group = parser.add_mutually_exclusive_group() group.add_argument( "--adios", - help="Adios dataset", + help="Adios datasets", action="store_const", dest="format", const="adios", ) group.add_argument( "--pickle", - help="Pickle dataset", + help="Pickle datasets", action="store_const", dest="format", const="pickle", ) group.add_argument( - "--csv", help="CSV dataset", action="store_const", dest="format", const="csv" + "--csv", help="CSV datasets", action="store_const", dest="format", const="csv" ) parser.add_argument( "--use_deepspeed", @@ -291,7 +292,7 @@ def __getitem__(self, idx): graph_feature_names = ["GAP"] graph_feature_dim = [1] dirpwd = os.path.dirname(os.path.abspath(__file__)) - datadir = os.path.join(dirpwd, "dataset/") + datadir = os.path.join(dirpwd, "datasets/") ################################################################################################################## inputfilesubstr = args.inputfilesubstr input_filename = os.path.join(dirpwd, "ogb_" + inputfilesubstr + ".json") @@ -359,7 +360,7 @@ def __getitem__(self, idx): ## pickle basedir = os.path.join( - os.path.dirname(__file__), "dataset", "%s.pickle" % modelname + os.path.dirname(__file__), "datasets", "%s.pickle" % modelname ) attrs = dict() attrs["pna_deg"] = deg @@ -390,7 +391,7 @@ def __getitem__(self, idx): ) if args.format == "adios": - fname = os.path.join(os.path.dirname(__file__), "dataset", "ogb_gap.bp") + fname = os.path.join(os.path.dirname(__file__), "datasets", "ogb_gap.bp") adwriter = AdiosWriter(fname, comm) adwriter.add("trainset", trainset) adwriter.add("valset", valset) @@ -406,12 +407,12 @@ def __getitem__(self, idx): opt = {"preload": True, "shmem": False} if args.shmem: opt = {"preload": False, "shmem": True} - fname = os.path.join(os.path.dirname(__file__), "dataset", "ogb_gap.bp") + fname = os.path.join(os.path.dirname(__file__), "datasets", "ogb_gap.bp") trainset = AdiosDataset(fname, "trainset", comm, **opt) valset = AdiosDataset(fname, "valset", comm, **opt) testset = AdiosDataset(fname, "testset", comm, **opt) elif args.format == "csv": - fname = os.path.join(os.path.dirname(__file__), "dataset", "pcqm4m_gap.csv") + fname = os.path.join(os.path.dirname(__file__), "datasets", "pcqm4m_gap.csv") fact = OGBRawDatasetFactory( fname, var_config=var_config, sampling=args.sampling ) @@ -421,7 +422,7 @@ def __getitem__(self, idx): elif args.format == "pickle": info("Pickle load") basedir = os.path.join( - os.path.dirname(__file__), "dataset", "%s.pickle" % modelname + os.path.dirname(__file__), "datasets", "%s.pickle" % modelname ) trainset = SimplePickleDataset( basedir=basedir, label="trainset", var_config=var_config diff --git a/examples/open_catalyst_2020/download_dataset.py b/examples/open_catalyst_2020/download_dataset.py index 99865f3ae..c938a07dc 100644 --- a/examples/open_catalyst_2020/download_dataset.py +++ b/examples/open_catalyst_2020/download_dataset.py @@ -140,8 +140,8 @@ def cleanup(filename, dirname): parser.add_argument( "--data-path", type=str, - default="./dataset", - help="Specify path to save dataset. Defaults to './dataset'", + default="./datasets", + help="Specify path to save datasets. Defaults to './datasets'", ) args, _ = parser.parse_known_args() diff --git a/examples/open_catalyst_2020/train.py b/examples/open_catalyst_2020/train.py index d971fa15d..577892290 100644 --- a/examples/open_catalyst_2020/train.py +++ b/examples/open_catalyst_2020/train.py @@ -13,17 +13,20 @@ torch.manual_seed(random_state) import hydragnn -from hydragnn.utils.time_utils import Timer +from hydragnn.utils.profiling_and_tracing.time_utils import Timer from hydragnn.utils.model import print_model -from hydragnn.utils.abstractbasedataset import AbstractBaseDataset -from hydragnn.utils.distdataset import DistDataset -from hydragnn.utils.pickledataset import SimplePickleWriter, SimplePickleDataset -from hydragnn.preprocess.utils import gather_deg +from hydragnn.utils.datasets.abstractbasedataset import AbstractBaseDataset +from hydragnn.utils.datasets.distdataset import DistDataset +from hydragnn.utils.datasets.pickledataset import ( + SimplePickleWriter, + SimplePickleDataset, +) +from hydragnn.preprocess.graph_samples_checks_and_updates import gather_deg from hydragnn.preprocess.load_data import split_dataset -import hydragnn.utils.tracer as tr +import hydragnn.utils.profiling_and_tracing.tracer as tr -from hydragnn.utils.print_utils import iterate_tqdm, log +from hydragnn.utils.print.print_utils import iterate_tqdm, log from utils.atoms_to_graphs import AtomsToGraphs from utils.preprocess import write_images_to_adios @@ -152,7 +155,7 @@ def get(self, idx): type=bool, default=True, ) - parser.add_argument("--ddstore", action="store_true", help="ddstore dataset") + parser.add_argument("--ddstore", action="store_true", help="ddstore datasets") parser.add_argument("--ddstore_width", type=int, help="ddstore width", default=None) parser.add_argument("--shmem", action="store_true", help="shmem") parser.add_argument("--log", help="log name") @@ -164,14 +167,14 @@ def get(self, idx): group = parser.add_mutually_exclusive_group() group.add_argument( "--adios", - help="Adios dataset", + help="Adios datasets", action="store_const", dest="format", const="adios", ) group.add_argument( "--pickle", - help="Pickle dataset", + help="Pickle datasets", action="store_const", dest="format", const="pickle", @@ -256,7 +259,7 @@ def get(self, idx): ## adios if args.format == "adios": fname = os.path.join( - os.path.dirname(__file__), "./dataset/%s.bp" % modelname + os.path.dirname(__file__), "./datasets/%s.bp" % modelname ) adwriter = AdiosWriter(fname, comm) adwriter.add("trainset", trainset) @@ -315,7 +318,7 @@ def get(self, idx): "ddstore": args.ddstore, "ddstore_width": args.ddstore_width, } - fname = os.path.join(os.path.dirname(__file__), "./dataset/%s.bp" % modelname) + fname = os.path.join(os.path.dirname(__file__), "./datasets/%s.bp" % modelname) trainset = AdiosDataset(fname, "trainset", comm, **opt, var_config=var_config) valset = AdiosDataset(fname, "valset", comm, **opt, var_config=var_config) testset = AdiosDataset(fname, "testset", comm, **opt, var_config=var_config) diff --git a/examples/open_catalyst_2020/uncompress.py b/examples/open_catalyst_2020/uncompress.py index 49f223c81..8f7a92328 100644 --- a/examples/open_catalyst_2020/uncompress.py +++ b/examples/open_catalyst_2020/uncompress.py @@ -28,7 +28,7 @@ def decompress_list_of_files(ip_op_pair: Tuple[str, str]) -> None: def get_parser() -> argparse.ArgumentParser: parser = argparse.ArgumentParser() parser.add_argument( - "--ipdir", type=str, help="Path to compressed dataset directory" + "--ipdir", type=str, help="Path to compressed datasets directory" ) parser.add_argument( "--opdir", type=str, help="Directory path to uncompress files to" diff --git a/examples/open_catalyst_2020/utils/atoms_to_graphs.py b/examples/open_catalyst_2020/utils/atoms_to_graphs.py index 10222757c..a0edc97ce 100644 --- a/examples/open_catalyst_2020/utils/atoms_to_graphs.py +++ b/examples/open_catalyst_2020/utils/atoms_to_graphs.py @@ -15,7 +15,10 @@ from torch_geometric.data import Data from torch_geometric.transforms import Distance, Spherical, LocalCartesian -from hydragnn.preprocess.utils import RadiusGraph, RadiusGraphPBC +from hydragnn.preprocess.graph_samples_checks_and_updates import ( + RadiusGraph, + RadiusGraphPBC, +) # transform_coordinates = Spherical(norm=False, cat=False) # transform_coordinates = LocalCartesian(norm=False, cat=False) diff --git a/examples/open_catalyst_2022/train.py b/examples/open_catalyst_2022/train.py index 80d098852..268539ff3 100644 --- a/examples/open_catalyst_2022/train.py +++ b/examples/open_catalyst_2022/train.py @@ -19,19 +19,25 @@ from torch_geometric.transforms import Distance, Spherical, LocalCartesian import hydragnn -from hydragnn.utils.time_utils import Timer +from hydragnn.utils.profiling_and_tracing.time_utils import Timer from hydragnn.utils.model import print_model -from hydragnn.utils.abstractbasedataset import AbstractBaseDataset -from hydragnn.utils.distdataset import DistDataset -from hydragnn.utils.pickledataset import SimplePickleWriter, SimplePickleDataset -from hydragnn.preprocess.utils import gather_deg +from hydragnn.utils.datasets.abstractbasedataset import AbstractBaseDataset +from hydragnn.utils.datasets.distdataset import DistDataset +from hydragnn.utils.datasets.pickledataset import ( + SimplePickleWriter, + SimplePickleDataset, +) +from hydragnn.preprocess.graph_samples_checks_and_updates import gather_deg from hydragnn.preprocess.load_data import split_dataset -import hydragnn.utils.tracer as tr +import hydragnn.utils.profiling_and_tracing.tracer as tr -from hydragnn.utils.print_utils import iterate_tqdm, log +from hydragnn.utils.print.print_utils import iterate_tqdm, log -from hydragnn.preprocess.utils import RadiusGraph, RadiusGraphPBC +from hydragnn.preprocess.graph_samples_checks_and_updates import ( + RadiusGraph, + RadiusGraphPBC, +) from ase.io import read @@ -40,8 +46,7 @@ except ImportError: pass -import subprocess -from hydragnn.utils import nsplit +from hydragnn.utils.distributed import nsplit def info(*args, logtype="info", sep=" "): @@ -209,7 +214,7 @@ def get(self, idx): type=bool, default=True, ) - parser.add_argument("--ddstore", action="store_true", help="ddstore dataset") + parser.add_argument("--ddstore", action="store_true", help="ddstore datasets") parser.add_argument("--ddstore_width", type=int, help="ddstore width", default=None) parser.add_argument("--shmem", action="store_true", help="shmem") parser.add_argument("--log", help="log name") @@ -220,14 +225,14 @@ def get(self, idx): group = parser.add_mutually_exclusive_group() group.add_argument( "--adios", - help="Adios dataset", + help="Adios datasets", action="store_const", dest="format", const="adios", ) group.add_argument( "--pickle", - help="Pickle dataset", + help="Pickle datasets", action="store_const", dest="format", const="pickle", @@ -240,7 +245,7 @@ def get(self, idx): node_feature_names = ["atomic_number", "cartesian_coordinates", "forces"] node_feature_dims = [1, 3, 3] dirpwd = os.path.dirname(os.path.abspath(__file__)) - datadir = os.path.join(dirpwd, "dataset") + datadir = os.path.join(dirpwd, "datasets") ################################################################################################################## input_filename = os.path.join(dirpwd, args.inputfile) ################################################################################################################## @@ -316,7 +321,7 @@ def get(self, idx): ## adios if args.format == "adios": fname = os.path.join( - os.path.dirname(__file__), "./dataset/%s.bp" % modelname + os.path.dirname(__file__), "./datasets/%s.bp" % modelname ) adwriter = AdiosWriter(fname, comm) adwriter.add("trainset", trainset) @@ -330,7 +335,7 @@ def get(self, idx): ## pickle elif args.format == "pickle": basedir = os.path.join( - os.path.dirname(__file__), "dataset", "%s.pickle" % modelname + os.path.dirname(__file__), "datasets", "%s.pickle" % modelname ) attrs = dict() attrs["pna_deg"] = deg @@ -375,14 +380,14 @@ def get(self, idx): "ddstore": args.ddstore, "ddstore_width": args.ddstore_width, } - fname = os.path.join(os.path.dirname(__file__), "./dataset/%s.bp" % modelname) + fname = os.path.join(os.path.dirname(__file__), "./datasets/%s.bp" % modelname) trainset = AdiosDataset(fname, "trainset", comm, **opt, var_config=var_config) valset = AdiosDataset(fname, "valset", comm, **opt, var_config=var_config) testset = AdiosDataset(fname, "testset", comm, **opt, var_config=var_config) elif args.format == "pickle": info("Pickle load") basedir = os.path.join( - os.path.dirname(__file__), "dataset", "%s.pickle" % modelname + os.path.dirname(__file__), "datasets", "%s.pickle" % modelname ) trainset = SimplePickleDataset( basedir=basedir, label="trainset", var_config=var_config diff --git a/examples/qm7x/train.py b/examples/qm7x/train.py index 12a176da4..66cde44c4 100644 --- a/examples/qm7x/train.py +++ b/examples/qm7x/train.py @@ -14,14 +14,17 @@ import argparse import hydragnn -from hydragnn.utils.print_utils import iterate_tqdm, log -from hydragnn.utils.time_utils import Timer +from hydragnn.utils.print.print_utils import iterate_tqdm, log +from hydragnn.utils.profiling_and_tracing.time_utils import Timer from hydragnn.utils.distributed import get_device from hydragnn.preprocess.load_data import split_dataset -from hydragnn.utils.distdataset import DistDataset -from hydragnn.utils.pickledataset import SimplePickleWriter, SimplePickleDataset -from hydragnn.preprocess.utils import gather_deg +from hydragnn.utils.datasets.distdataset import DistDataset +from hydragnn.utils.datasets.pickledataset import ( + SimplePickleWriter, + SimplePickleDataset, +) +from hydragnn.preprocess.graph_samples_checks_and_updates import gather_deg import numpy as np @@ -42,8 +45,8 @@ except ImportError: pass -from hydragnn.utils import nsplit -import hydragnn.utils.tracer as tr +from hydragnn.utils.distributed import nsplit +import hydragnn.utils.profiling_and_tracing.tracer as tr # FIXME: this works fine for now because we train on QM7-X molecules # for larger chemical spaces, the following atom representation has to be properly expanded @@ -73,7 +76,7 @@ def info(*args, logtype="info", sep=" "): class QM7XDataset(AbstractBaseDataset): - """QM7-XDataset dataset class""" + """QM7-XDataset datasets class""" def __init__(self, dirpath, var_config, energy_per_atom=True, dist=False): super().__init__() @@ -197,7 +200,7 @@ def hdf5_to_graph(self, fMOL, molid): # check forces values assert self.check_forces_values( forces - ), f"qm7x dataset - molid:{molid} - confid:{confid} - L2-norm of atomic forces exceeds {self.forces_norm_threshold}" + ), f"qm7x datasets - molid:{molid} - confid:{confid} - L2-norm of atomic forces exceeds {self.forces_norm_threshold}" if self.energy_per_atom: energy = EPBE0 / natoms @@ -246,7 +249,7 @@ def get(self, idx): default=True, ) - parser.add_argument("--ddstore", action="store_true", help="ddstore dataset") + parser.add_argument("--ddstore", action="store_true", help="ddstore datasets") parser.add_argument("--ddstore_width", type=int, help="ddstore width", default=None) parser.add_argument("--shmem", action="store_true", help="shmem") parser.add_argument("--log", help="log name") @@ -256,14 +259,14 @@ def get(self, idx): group = parser.add_mutually_exclusive_group() group.add_argument( "--adios", - help="Adios dataset", + help="Adios datasets", action="store_const", dest="format", const="adios", ) group.add_argument( "--pickle", - help="Pickle dataset", + help="Pickle datasets", action="store_const", dest="format", const="pickle", @@ -346,7 +349,7 @@ def get(self, idx): ## adios if args.format == "adios": fname = os.path.join( - os.path.dirname(__file__), "./dataset/%s.bp" % modelname + os.path.dirname(__file__), "./datasets/%s.bp" % modelname ) adwriter = AdiosWriter(fname, comm) adwriter.add("trainset", trainset) @@ -405,7 +408,7 @@ def get(self, idx): "ddstore": args.ddstore, "ddstore_width": args.ddstore_width, } - fname = os.path.join(os.path.dirname(__file__), "./dataset/%s.bp" % modelname) + fname = os.path.join(os.path.dirname(__file__), "./datasets/%s.bp" % modelname) trainset = AdiosDataset(fname, "trainset", comm, **opt, var_config=var_config) valset = AdiosDataset(fname, "valset", comm, **opt, var_config=var_config) testset = AdiosDataset(fname, "testset", comm, **opt, var_config=var_config) diff --git a/examples/qm9/qm9.py b/examples/qm9/qm9.py index 2e6c3da8e..689717374 100644 --- a/examples/qm9/qm9.py +++ b/examples/qm9/qm9.py @@ -53,12 +53,12 @@ def qm9_pre_filter(data): # Enable print to log file. hydragnn.utils.setup_log(log_name) -# Use built-in torch_geometric dataset. +# Use built-in torch_geometric datasets. # Filter function above used to run quick example. # NOTE: data is moved to the device in the pre-transform. # NOTE: transforms/filters will NOT be re-run unless the qm9/processed/ directory is removed. dataset = torch_geometric.datasets.QM9( - root="dataset/qm9", pre_transform=qm9_pre_transform, pre_filter=qm9_pre_filter + root="datasets/qm9", pre_transform=qm9_pre_transform, pre_filter=qm9_pre_filter ) train, val, test = hydragnn.preprocess.split_dataset( dataset, config["NeuralNetwork"]["Training"]["perc_train"], False @@ -81,7 +81,7 @@ def qm9_pre_filter(data): optimizer, mode="min", factor=0.5, patience=5, min_lr=0.00001 ) -# Run training with the given model and qm9 dataset. +# Run training with the given model and qm9 datasets. writer = hydragnn.utils.get_summary_writer(log_name) hydragnn.utils.save_config(config, log_name) diff --git a/examples/qm9_hpo/qm9.py b/examples/qm9_hpo/qm9.py index 61ef2376e..83bdf1a83 100644 --- a/examples/qm9_hpo/qm9.py +++ b/examples/qm9_hpo/qm9.py @@ -75,12 +75,12 @@ def qm9_pre_filter(data): # Enable print to log file. hydragnn.utils.setup_log(log_name) -# Use built-in torch_geometric dataset. +# Use built-in torch_geometric datasets. # Filter function above used to run quick example. # NOTE: data is moved to the device in the pre-transform. # NOTE: transforms/filters will NOT be re-run unless the qm9/processed/ directory is removed. dataset = torch_geometric.datasets.QM9( - root="dataset/qm9", pre_transform=qm9_pre_transform, pre_filter=qm9_pre_filter + root="datasets/qm9", pre_transform=qm9_pre_transform, pre_filter=qm9_pre_filter ) train, val, test = hydragnn.preprocess.split_dataset( dataset, config["NeuralNetwork"]["Training"]["perc_train"], False @@ -103,7 +103,7 @@ def qm9_pre_filter(data): optimizer, mode="min", factor=0.5, patience=5, min_lr=0.00001 ) -# Run training with the given model and qm9 dataset. +# Run training with the given model and qm9 datasets. writer = hydragnn.utils.get_summary_writer(log_name) hydragnn.utils.save_config(config, log_name) diff --git a/examples/qm9_hpo/qm9_deephyper.py b/examples/qm9_hpo/qm9_deephyper.py index cb4019cf4..f8ab91826 100644 --- a/examples/qm9_hpo/qm9_deephyper.py +++ b/examples/qm9_hpo/qm9_deephyper.py @@ -135,12 +135,12 @@ def run(trial): log_name = "qm9" - # Use built-in torch_geometric dataset. + # Use built-in torch_geometric datasets. # Filter function above used to run quick example. # NOTE: data is moved to the device in the pre-transform. # NOTE: transforms/filters will NOT be re-run unless the qm9/processed/ directory is removed. dataset = torch_geometric.datasets.QM9( - root="dataset/qm9", pre_transform=qm9_pre_transform + root="datasets/qm9", pre_transform=qm9_pre_transform ) trainset, valset, testset = hydragnn.preprocess.split_dataset(dataset, 0.8, False) diff --git a/examples/qm9_hpo/qm9_optuna.py b/examples/qm9_hpo/qm9_optuna.py index 64403ac29..07057bf60 100644 --- a/examples/qm9_hpo/qm9_optuna.py +++ b/examples/qm9_hpo/qm9_optuna.py @@ -171,12 +171,12 @@ def objective(trial): # Enable print to log file. hydragnn.utils.setup_log(log_name) - # Use built-in torch_geometric dataset. + # Use built-in torch_geometric datasets. # Filter function above used to run quick example. # NOTE: data is moved to the device in the pre-transform. # NOTE: transforms/filters will NOT be re-run unless the qm9/processed/ directory is removed. dataset = torch_geometric.datasets.QM9( - root="dataset/qm9", pre_transform=qm9_pre_transform + root="datasets/qm9", pre_transform=qm9_pre_transform ) trainset, valset, testset = hydragnn.preprocess.split_dataset( dataset, config["NeuralNetwork"]["Training"]["perc_train"], False diff --git a/hydragnn/__init__.py b/hydragnn/__init__.py index acbfaa786..b008f952e 100644 --- a/hydragnn/__init__.py +++ b/hydragnn/__init__.py @@ -1,3 +1,2 @@ -from . import preprocess, models, train, postprocess, utils from .run_training import run_training from .run_prediction import run_prediction diff --git a/hydragnn/models/Base.py b/hydragnn/models/Base.py index 2bcb791ba..bb863ec69 100644 --- a/hydragnn/models/Base.py +++ b/hydragnn/models/Base.py @@ -264,7 +264,7 @@ def _multihead(self): assert ( self.num_nodes is not None ), "num_nodes must be positive integer for MLP" - # """if different graphs in the dataset have different size, one MLP is shared across all nodes """ + # """if different graphs in the datasets have different size, one MLP is shared across all nodes """ head_NN = MLPNode( self.hidden_dim, self.head_dims[ihead] * (1 + self.var_output), diff --git a/hydragnn/preprocess/__init__.py b/hydragnn/preprocess/__init__.py index c5ce6ac20..ccc0d2bb0 100644 --- a/hydragnn/preprocess/__init__.py +++ b/hydragnn/preprocess/__init__.py @@ -1,6 +1,6 @@ from .dataset_descriptors import AtomFeatures, StructureFeatures -from .utils import ( +from .graph_samples_checks_and_updates import ( check_if_graph_size_variable, check_if_graph_size_variable_dist, get_radius_graph, @@ -10,9 +10,10 @@ RadiusGraphPBC, update_predicted_values, update_atom_features, - stratified_sampling, ) +from .stratified_sampling import stratified_sampling + from .load_data import ( dataset_loading_and_splitting, create_dataloaders, @@ -26,4 +27,3 @@ ) from .lsms_raw_dataset_loader import LSMS_RawDataLoader from .cfg_raw_dataset_loader import CFG_RawDataLoader -from .compositional_data_splitting import compositional_stratified_splitting diff --git a/hydragnn/preprocess/cfg_raw_dataset_loader.py b/hydragnn/preprocess/cfg_raw_dataset_loader.py index b5043abb1..32f44c867 100644 --- a/hydragnn/preprocess/cfg_raw_dataset_loader.py +++ b/hydragnn/preprocess/cfg_raw_dataset_loader.py @@ -12,7 +12,6 @@ import os import numpy as np -import torch from torch_geometric.data import Data from torch import tensor diff --git a/hydragnn/preprocess/utils.py b/hydragnn/preprocess/graph_samples_checks_and_updates.py similarity index 85% rename from hydragnn/preprocess/utils.py rename to hydragnn/preprocess/graph_samples_checks_and_updates.py index 3533756d2..b4162d742 100644 --- a/hydragnn/preprocess/utils.py +++ b/hydragnn/preprocess/graph_samples_checks_and_updates.py @@ -20,7 +20,7 @@ from .dataset_descriptors import AtomFeatures -## This function can be slow if dataset is too large. Use with caution. +## This function can be slow if datasets is too large. Use with caution. ## Recommend to use check_if_graph_size_variable_dist def check_if_graph_size_variable(train_loader, val_loader, test_loader): backend = os.getenv("HYDRAGNN_AGGR_BACKEND", "torch") @@ -175,7 +175,7 @@ def __repr__(self) -> str: def gather_deg(dataset): - from hydragnn.utils.print_utils import iterate_tqdm + from hydragnn.utils.print.print_utils import iterate_tqdm backend = os.getenv("HYDRAGNN_AGGR_BACKEND", "torch") if backend == "torch": @@ -197,7 +197,7 @@ def gather_deg(dataset): def gather_deg_dist(dataset): import torch.distributed as dist - from hydragnn.utils.print_utils import iterate_tqdm + from hydragnn.utils.print.print_utils import iterate_tqdm from hydragnn.utils.distributed import get_device max_deg = 0 @@ -218,7 +218,7 @@ def gather_deg_dist(dataset): def gather_deg_mpi(dataset): from mpi4py import MPI - from hydragnn.utils.print_utils import iterate_tqdm + from hydragnn.utils.print.print_utils import iterate_tqdm max_deg = 0 for data in iterate_tqdm(dataset, 2, desc="Degree max"): @@ -290,47 +290,3 @@ def update_atom_features(atom_features: [AtomFeatures], data: Data): """ feature_indices = [i for i in atom_features] data.x = data.x[:, feature_indices] - - -def stratified_sampling(dataset: [Data], subsample_percentage: float, verbosity=0): - """Given the dataset and the percentage of data you want to extract from it, method will - apply stratified sampling where X is the dataset and Y is are the category values for each datapoint. - In the case of the structures dataset where each structure contains 2 types of atoms, the category will - be constructed in a way: number of atoms of type 1 + number of protons of type 2 * 100. - - Parameters - ---------- - dataset: [Data] - A list of Data objects representing a structure that has atoms. - subsample_percentage: float - Percentage of the dataset. - - Returns - ---------- - [Data] - Subsample of the original dataset constructed using stratified sampling. - """ - dataset_categories = [] - print_distributed(verbosity, "Computing the categories for the whole dataset.") - for data in iterate_tqdm(dataset, verbosity): - frequencies = torch.bincount(data.x[:, 0].int()) - frequencies = sorted(frequencies[frequencies > 0].tolist()) - category = 0 - for index, frequency in enumerate(frequencies): - category += frequency * (100 ** index) - dataset_categories.append(category) - - subsample_indices = [] - subsample = [] - - sss = StratifiedShuffleSplit( - n_splits=1, train_size=subsample_percentage, random_state=0 - ) - - for subsample_index, rest_of_data_index in sss.split(dataset, dataset_categories): - subsample_indices = subsample_index.tolist() - - for index in subsample_indices: - subsample.append(dataset[index]) - - return subsample diff --git a/hydragnn/preprocess/load_data.py b/hydragnn/preprocess/load_data.py index 606c4a680..90ef91f87 100644 --- a/hydragnn/preprocess/load_data.py +++ b/hydragnn/preprocess/load_data.py @@ -16,7 +16,6 @@ import torch import torch.distributed as dist -import torch_geometric # FIXME: deprecated in torch_geometric 2.0 try: @@ -27,23 +26,21 @@ from hydragnn.preprocess.serialized_dataset_loader import SerializedDataLoader from hydragnn.preprocess.lsms_raw_dataset_loader import LSMS_RawDataLoader from hydragnn.preprocess.cfg_raw_dataset_loader import CFG_RawDataLoader -from hydragnn.preprocess.compositional_data_splitting import ( +from hydragnn.utils.datasets.compositional_data_splitting import ( compositional_stratified_splitting, ) from hydragnn.utils.distributed import get_comm_size_and_rank -from hydragnn.utils.time_utils import Timer +from hydragnn.utils.profiling_and_tracing.time_utils import Timer import pickle -from hydragnn.utils.print_utils import print_master, log +from hydragnn.utils.print.print_utils import log -from torch_geometric.data import Batch, Dataset +from torch_geometric.data import Batch from torch.utils.data.dataloader import _DatasetKind -from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor +from concurrent.futures import ThreadPoolExecutor import multiprocessing as mp import queue -import time -import sys import re @@ -211,7 +208,7 @@ def dataset_loading_and_splitting(config: {}): if not list(config["Dataset"]["path"].values())[0].endswith(".pkl"): transform_raw_data_to_serialized(config["Dataset"]) - ##if total dataset is provided, split the dataset and save them to pkl files and update config with pkl file locations + ##if total datasets is provided, split the datasets and save them to pkl files and update config with pkl file locations if "total" in config["Dataset"]["path"].keys(): total_to_train_val_test_pkls(config) @@ -374,7 +371,7 @@ def total_to_train_val_test_pkls(config, isdist=False): file_dir = config["Dataset"]["path"]["total"] else: file_dir = f"{os.environ['SERIALIZED_DATA_PATH']}/serialized_dataset/{config['Dataset']['name']}.pkl" - # if "total" raw dataset is provided, generate train/val/test pkl files and update config dict. + # if "total" raw datasets is provided, generate train/val/test pkl files and update config dict. with open(file_dir, "rb") as f: minmax_node_feature = pickle.load(f) minmax_graph_feature = pickle.load(f) diff --git a/hydragnn/preprocess/raw_dataset_loader.py b/hydragnn/preprocess/raw_dataset_loader.py index c0443bf2a..702e0ef92 100644 --- a/hydragnn/preprocess/raw_dataset_loader.py +++ b/hydragnn/preprocess/raw_dataset_loader.py @@ -14,12 +14,11 @@ import pickle import torch -from torch_geometric.data import Data -from torch import tensor from hydragnn.utils.distributed import get_device -from hydragnn.utils.print_utils import log -from hydragnn.utils import nsplit, tensor_divide, comm_reduce +from hydragnn.utils.print.print_utils import log +from hydragnn.utils.distributed import nsplit, comm_reduce +from hydragnn.utils.model.model import tensor_divide import random @@ -38,7 +37,7 @@ class AbstractRawDataLoader: def __init__(self, config, dist=False): """ config: - shows the dataset path the target variables information, e.g, location and dimension, in data file + shows the datasets path the target variables information, e.g, location and dimension, in data file ########### dataset_list: list of datasets read from self.path_dictionary @@ -193,7 +192,7 @@ def scale_features_by_num_nodes(self, dataset): def normalize_dataset(self): - """Performs the normalization on Data objects and returns the normalized dataset.""" + """Performs the normalization on Data objects and returns the normalized datasets.""" num_node_features = len(self.node_feature_dim) num_graph_features = len(self.graph_feature_dim) diff --git a/hydragnn/preprocess/serialized_dataset_loader.py b/hydragnn/preprocess/serialized_dataset_loader.py index bef054edf..3b385f936 100644 --- a/hydragnn/preprocess/serialized_dataset_loader.py +++ b/hydragnn/preprocess/serialized_dataset_loader.py @@ -23,8 +23,8 @@ from hydragnn.preprocess import update_predicted_values, update_atom_features from hydragnn.utils.distributed import get_device -from hydragnn.utils.print_utils import print_distributed, iterate_tqdm -from hydragnn.preprocess.utils import ( +from hydragnn.utils.print.print_utils import print_distributed, iterate_tqdm +from hydragnn.preprocess.graph_samples_checks_and_updates import ( get_radius_graph, get_radius_graph_pbc, ) @@ -194,9 +194,9 @@ def load_serialized_data(self, dataset_path: str): return dataset def __stratified_sampling(self, dataset: [Data], subsample_percentage: float): - """Given the dataset and the percentage of data you want to extract from it, method will - apply stratified sampling where X is the dataset and Y is are the category values for each datapoint. - In the case of the structures dataset where each structure contains 2 types of atoms, the category will + """Given the datasets and the percentage of data you want to extract from it, method will + apply stratified sampling where X is the datasets and Y is are the category values for each datapoint. + In the case of the structures datasets where each structure contains 2 types of atoms, the category will be constructed in a way: number of atoms of type 1 + number of protons of type 2 * 100. Parameters @@ -204,16 +204,16 @@ def __stratified_sampling(self, dataset: [Data], subsample_percentage: float): dataset: [Data] A list of Data objects representing a structure that has atoms. subsample_percentage: float - Percentage of the dataset. + Percentage of the datasets. Returns ---------- [Data] - Subsample of the original dataset constructed using stratified sampling. + Subsample of the original datasets constructed using stratified sampling. """ dataset_categories = [] print_distributed( - self.verbosity, "Computing the categories for the whole dataset." + self.verbosity, "Computing the categories for the whole datasets." ) for data in iterate_tqdm(dataset, self.verbosity): frequencies = torch.bincount(data.x[:, 0].int()) diff --git a/hydragnn/preprocess/stratified_sampling.py b/hydragnn/preprocess/stratified_sampling.py new file mode 100644 index 000000000..3072ff4d5 --- /dev/null +++ b/hydragnn/preprocess/stratified_sampling.py @@ -0,0 +1,48 @@ +import torch +from torch_geometric.data import Data +from hydragnn.utils.print.print_utils import print_distributed, iterate_tqdm +from sklearn.model_selection import StratifiedShuffleSplit + + +def stratified_sampling(dataset: [Data], subsample_percentage: float, verbosity=0): + """Given the datasets and the percentage of data you want to extract from it, method will + apply stratified sampling where X is the datasets and Y is are the category values for each datapoint. + In the case of the structures datasets where each structure contains 2 types of atoms, the category will + be constructed in a way: number of atoms of type 1 + number of protons of type 2 * 100. + + Parameters + ---------- + dataset: [Data] + A list of Data objects representing a structure that has atoms. + subsample_percentage: float + Percentage of the datasets. + + Returns + ---------- + [Data] + Subsample of the original datasets constructed using stratified sampling. + """ + dataset_categories = [] + print_distributed(verbosity, "Computing the categories for the whole datasets.") + for data in iterate_tqdm(dataset, verbosity): + frequencies = torch.bincount(data.x[:, 0].int()) + frequencies = sorted(frequencies[frequencies > 0].tolist()) + category = 0 + for index, frequency in enumerate(frequencies): + category += frequency * (100 ** index) + dataset_categories.append(category) + + subsample_indices = [] + subsample = [] + + sss = StratifiedShuffleSplit( + n_splits=1, train_size=subsample_percentage, random_state=0 + ) + + for subsample_index, rest_of_data_index in sss.split(dataset, dataset_categories): + subsample_indices = subsample_index.tolist() + + for index in subsample_indices: + subsample.append(dataset[index]) + + return subsample diff --git a/hydragnn/run_prediction.py b/hydragnn/run_prediction.py index b1b7bf3de..3d15f26a3 100755 --- a/hydragnn/run_prediction.py +++ b/hydragnn/run_prediction.py @@ -15,7 +15,7 @@ from hydragnn.preprocess.load_data import dataset_loading_and_splitting from hydragnn.utils.distributed import setup_ddp, get_distributed_model from hydragnn.utils.model import load_existing_model -from hydragnn.utils.config_utils import ( +from hydragnn.utils.input_config_parsing.config_utils import ( update_config, get_log_name_config, parse_deepspeed_config, diff --git a/hydragnn/run_training.py b/hydragnn/run_training.py index 035693380..4c17f3e13 100644 --- a/hydragnn/run_training.py +++ b/hydragnn/run_training.py @@ -12,7 +12,6 @@ import os, json from functools import singledispatch -import torch import torch.distributed as dist from torch.optim.lr_scheduler import ReduceLROnPlateau @@ -20,16 +19,16 @@ from hydragnn.utils.distributed import ( setup_ddp, get_distributed_model, - print_peak_memory, ) +from hydragnn.utils.distributed import print_peak_memory from hydragnn.utils.model import ( save_model, get_summary_writer, load_existing_model_config, ) -from hydragnn.utils.print_utils import print_distributed, setup_log -from hydragnn.utils.time_utils import print_timers -from hydragnn.utils.config_utils import ( +from hydragnn.utils.print.print_utils import print_distributed, setup_log +from hydragnn.utils.profiling_and_tracing.time_utils import print_timers +from hydragnn.utils.input_config_parsing.config_utils import ( update_config, get_log_name_config, save_config, diff --git a/hydragnn/utils/__init__.py b/hydragnn/utils/__init__.py deleted file mode 100644 index c1e23d2e1..000000000 --- a/hydragnn/utils/__init__.py +++ /dev/null @@ -1,40 +0,0 @@ -from .print_utils import print_distributed, iterate_tqdm, setup_log -from .distributed import ( - get_comm_size_and_rank, - get_device_list, - get_device, - get_device_name, - get_device_from_name, - is_model_distributed, - get_distributed_model, - setup_ddp, - nsplit, - comm_reduce, -) -from .model import ( - save_model, - get_summary_writer, - unsorted_segment_mean, - load_existing_model, - load_existing_model_config, - loss_function_selection, - tensor_divide, - EarlyStopping, -) -from .time_utils import Timer, print_timers -from .config_utils import ( - update_config, - update_config_minmax, - get_log_name_config, - save_config, -) -from .deephyper import ( - master_from_host, - read_node_list, - create_ds_config, - read_job_node_list, - create_launch_command, -) - -from .optimizer import select_optimizer -from .atomicdescriptors import atomicdescriptors diff --git a/hydragnn/utils/datasets/__init__.py b/hydragnn/utils/datasets/__init__.py new file mode 100644 index 000000000..8f7028fba --- /dev/null +++ b/hydragnn/utils/datasets/__init__.py @@ -0,0 +1,19 @@ +from .abstractbasedataset import AbstractBaseDataset +from .abstractrawdataset import AbstractRawDataset +from .adiosdataset import AdiosDataset, AdiosWriter +from .cfgdataset import CFGDataset +from .compositional_data_splitting import ( + get_keys, + get_elements_list, + get_max_graph_size, + create_dictionary_from_elements_list, + create_dataset_categories, + duplicate_unique_data_samples, + generate_partition, + compositional_stratified_splitting, +) +from .distdataset import DistDataset +from .lsmsdataset import LSMSDataset +from .pickledataset import SimplePickleDataset, SimplePickleWriter +from .serializeddataset import SerializedDataset, SerializedWriter +from .xyzdataset import XYZDataset diff --git a/hydragnn/utils/abstractbasedataset.py b/hydragnn/utils/datasets/abstractbasedataset.py similarity index 87% rename from hydragnn/utils/abstractbasedataset.py rename to hydragnn/utils/datasets/abstractbasedataset.py index 556ead164..7e73e0859 100644 --- a/hydragnn/utils/abstractbasedataset.py +++ b/hydragnn/utils/datasets/abstractbasedataset.py @@ -5,7 +5,7 @@ class AbstractBaseDataset(torch.utils.data.Dataset, ABC): """ - HydraGNN's base dataset. This is abstract class. + HydraGNN's base datasets. This is abstract class. """ def __init__(self): @@ -15,14 +15,14 @@ def __init__(self): @abstractmethod def get(self, idx): """ - Return a dataset at idx + Return a datasets at idx """ pass @abstractmethod def len(self): """ - Total number of dataset. + Total number of datasets. If data is distributed, it should be the global total size. """ pass diff --git a/hydragnn/utils/abstractrawdataset.py b/hydragnn/utils/datasets/abstractrawdataset.py similarity index 95% rename from hydragnn/utils/abstractrawdataset.py rename to hydragnn/utils/datasets/abstractrawdataset.py index 657863ea4..d81d4bfd5 100644 --- a/hydragnn/utils/abstractrawdataset.py +++ b/hydragnn/utils/datasets/abstractrawdataset.py @@ -3,8 +3,6 @@ import random import torch -from torch import tensor -from torch_geometric.data import Data from torch_geometric.transforms import ( Distance, NormalizeRotation, @@ -12,38 +10,31 @@ PointPairFeatures, ) -from hydragnn.utils import nsplit, tensor_divide, comm_reduce -from hydragnn.utils.print_utils import print_distributed, iterate_tqdm, log +from hydragnn.utils.distributed import nsplit, comm_reduce +from hydragnn.utils.model.model import tensor_divide +from hydragnn.utils.print.print_utils import iterate_tqdm, log from hydragnn.utils.distributed import get_device -from hydragnn.utils.abstractbasedataset import AbstractBaseDataset -from hydragnn.preprocess.utils import ( +from hydragnn.utils.datasets.abstractbasedataset import AbstractBaseDataset +from hydragnn.preprocess.graph_samples_checks_and_updates import ( get_radius_graph, get_radius_graph_pbc, - get_radius_graph_config, - get_radius_graph_pbc_config, ) from hydragnn.preprocess import ( - update_predicted_values, - update_atom_features, stratified_sampling, ) -from sklearn.model_selection import StratifiedShuffleSplit - -from hydragnn.preprocess.dataset_descriptors import AtomFeatures - from abc import ABC, abstractmethod class AbstractRawDataset(AbstractBaseDataset, ABC): - """Raw dataset class""" + """Raw datasets class""" def __init__(self, config, dist=False, sampling=None): super().__init__() """ config: - shows the dataset path the target variables information, e.g, location and dimension, in data file + shows the datasets path the target variables information, e.g, location and dimension, in data file ########### dataset_list: list of datasets read from self.path_dictionary @@ -215,7 +206,7 @@ def __load_raw_data(self): def __normalize_dataset(self): - """Performs the normalization on Data objects and returns the normalized dataset.""" + """Performs the normalization on Data objects and returns the normalized datasets.""" num_node_features = len(self.node_feature_dim) num_graph_features = len(self.graph_feature_dim) diff --git a/hydragnn/utils/adiosdataset.py b/hydragnn/utils/datasets/adiosdataset.py similarity index 98% rename from hydragnn/utils/adiosdataset.py rename to hydragnn/utils/datasets/adiosdataset.py index c366cea86..d32661ac1 100644 --- a/hydragnn/utils/adiosdataset.py +++ b/hydragnn/utils/datasets/adiosdataset.py @@ -2,9 +2,8 @@ import pickle import time import os -import glob -from .print_utils import print_distributed, log, log0, iterate_tqdm +from hydragnn.utils.print.print_utils import log, log0, iterate_tqdm import numpy as np @@ -23,10 +22,10 @@ except ImportError: pass -import hydragnn.utils.tracer as tr +import hydragnn.utils.profiling_and_tracing.tracer as tr -from hydragnn.utils.abstractbasedataset import AbstractBaseDataset -from hydragnn.utils import nsplit +from hydragnn.utils.datasets.abstractbasedataset import AbstractBaseDataset +from hydragnn.utils.distributed import nsplit from hydragnn.preprocess import update_predicted_values, update_atom_features @@ -279,7 +278,7 @@ def save(self): class AdiosDataset(AbstractBaseDataset): - """Adios dataset class""" + """Adios datasets class""" def __init__( self, @@ -306,7 +305,7 @@ def __init__( comm: MPI_comm MPI communicator preload: bool, optional - Option to preload all the dataset into a memory + Option to preload all the datasets into a memory shmem: bool, optional Option to use shmem to share data between processes in the same node enable_cache: bool, optional diff --git a/hydragnn/utils/cfgdataset.py b/hydragnn/utils/datasets/cfgdataset.py similarity index 97% rename from hydragnn/utils/cfgdataset.py rename to hydragnn/utils/datasets/cfgdataset.py index 5e7c59e7d..8df40217b 100644 --- a/hydragnn/utils/cfgdataset.py +++ b/hydragnn/utils/datasets/cfgdataset.py @@ -3,7 +3,7 @@ from torch import tensor from torch_geometric.data import Data -from hydragnn.utils.abstractrawdataset import AbstractRawDataset +from hydragnn.utils.datasets.abstractrawdataset import AbstractRawDataset from ase.io.cfg import read_cfg diff --git a/hydragnn/preprocess/compositional_data_splitting.py b/hydragnn/utils/datasets/compositional_data_splitting.py similarity index 94% rename from hydragnn/preprocess/compositional_data_splitting.py rename to hydragnn/utils/datasets/compositional_data_splitting.py index 574c10dcf..4805c1245 100644 --- a/hydragnn/preprocess/compositional_data_splitting.py +++ b/hydragnn/utils/datasets/compositional_data_splitting.py @@ -115,8 +115,8 @@ def generate_partition( def compositional_stratified_splitting(dataset, perc_train): - """Given the dataset and the percentage of data you want to extract from it, method will - apply stratified sampling where X is the dataset and Y is are the category values for each datapoint. + """Given the datasets and the percentage of data you want to extract from it, method will + apply stratified sampling where X is the datasets and Y is are the category values for each datapoint. In the case each structure contains 2 types of atoms, the category will be constructed as such: number of atoms of type 1 + number of atoms of type 2 * 100. Parameters @@ -124,11 +124,11 @@ def compositional_stratified_splitting(dataset, perc_train): dataset: [Data] A list of Data objects representing a structure that has atoms. subsample_percentage: float - Percentage of the dataset. + Percentage of the datasets. Returns ---------- [Data] - Subsample of the original dataset constructed using stratified sampling. + Subsample of the original datasets constructed using stratified sampling. """ dataset_categories = create_dataset_categories(dataset) dataset, dataset_categories = duplicate_unique_data_samples( diff --git a/hydragnn/utils/distdataset.py b/hydragnn/utils/datasets/distdataset.py similarity index 95% rename from hydragnn/utils/distdataset.py rename to hydragnn/utils/datasets/distdataset.py index 5732a6cd8..80c282e67 100644 --- a/hydragnn/utils/distdataset.py +++ b/hydragnn/utils/datasets/distdataset.py @@ -4,23 +4,23 @@ import torch import torch_geometric.data -from hydragnn.utils.abstractbasedataset import AbstractBaseDataset +from hydragnn.utils.datasets.abstractbasedataset import AbstractBaseDataset try: import pyddstore as dds except ImportError: pass -from hydragnn.utils.print_utils import log, log0 -from hydragnn.utils import nsplit +from hydragnn.utils.print.print_utils import log0 +from hydragnn.utils.distributed import nsplit from hydragnn.preprocess import update_predicted_values, update_atom_features -import hydragnn.utils.tracer as tr +import hydragnn.utils.profiling_and_tracing.tracer as tr from tqdm import tqdm class DistDataset(AbstractBaseDataset): - """Distributed dataset class""" + """Distributed datasets class""" def __init__( self, diff --git a/hydragnn/utils/lsmsdataset.py b/hydragnn/utils/datasets/lsmsdataset.py similarity index 97% rename from hydragnn/utils/lsmsdataset.py rename to hydragnn/utils/datasets/lsmsdataset.py index a1314938d..99a121644 100644 --- a/hydragnn/utils/lsmsdataset.py +++ b/hydragnn/utils/datasets/lsmsdataset.py @@ -1,6 +1,6 @@ from torch import tensor from torch_geometric.data import Data -from hydragnn.utils.abstractrawdataset import AbstractRawDataset +from hydragnn.utils.datasets.abstractrawdataset import AbstractRawDataset class LSMSDataset(AbstractRawDataset): diff --git a/hydragnn/utils/pickledataset.py b/hydragnn/utils/datasets/pickledataset.py similarity index 95% rename from hydragnn/utils/pickledataset.py rename to hydragnn/utils/datasets/pickledataset.py index 8b99f0f9d..48da3d06b 100644 --- a/hydragnn/utils/pickledataset.py +++ b/hydragnn/utils/datasets/pickledataset.py @@ -1,15 +1,14 @@ import os import pickle -import torch from mpi4py import MPI -from .print_utils import print_distributed, log, iterate_tqdm +from hydragnn.utils.print.print_utils import log, iterate_tqdm -from hydragnn.utils.abstractbasedataset import AbstractBaseDataset +from hydragnn.utils.datasets.abstractbasedataset import AbstractBaseDataset from hydragnn.preprocess import update_predicted_values, update_atom_features -import hydragnn.utils.tracer as tr +import hydragnn.utils.profiling_and_tracing.tracer as tr class SimplePickleDataset(AbstractBaseDataset): @@ -119,7 +118,7 @@ def __init__( """ Parameters ---------- - dataset: locally owned dataset (should be iterable) + dataset: locally owned datasets (should be iterable) basedir: basedir label: label nmax: nmax in case of subdir diff --git a/hydragnn/utils/serializeddataset.py b/hydragnn/utils/datasets/serializeddataset.py similarity index 86% rename from hydragnn/utils/serializeddataset.py rename to hydragnn/utils/datasets/serializeddataset.py index c469e3cab..70f71076f 100644 --- a/hydragnn/utils/serializeddataset.py +++ b/hydragnn/utils/datasets/serializeddataset.py @@ -1,10 +1,10 @@ import os import pickle -from .print_utils import log +from hydragnn.utils.print.print_utils import log -from hydragnn.utils import get_comm_size_and_rank -from hydragnn.utils.abstractbasedataset import AbstractBaseDataset +from hydragnn.utils.distributed import get_comm_size_and_rank +from hydragnn.utils.datasets.abstractbasedataset import AbstractBaseDataset class SerializedDataset(AbstractBaseDataset): @@ -15,7 +15,7 @@ def __init__(self, basedir, datasetname, label, dist=False): Parameters ---------- basedir: basedir - datasetname: dataset name + datasetname: datasets name label: label """ super().__init__() @@ -62,9 +62,9 @@ def __init__( """ Parameters ---------- - dataset: locally owned dataset (should be iterable) + dataset: locally owned datasets (should be iterable) basedir: basedir - datasetname: dataset name + datasetname: datasets name label: label nmax: nmax in case of subdir minmax_node_feature: minmax_node_feature diff --git a/hydragnn/utils/xyzdataset.py b/hydragnn/utils/datasets/xyzdataset.py similarity index 95% rename from hydragnn/utils/xyzdataset.py rename to hydragnn/utils/datasets/xyzdataset.py index b7c89be30..e3b57c29b 100644 --- a/hydragnn/utils/xyzdataset.py +++ b/hydragnn/utils/datasets/xyzdataset.py @@ -3,9 +3,8 @@ from torch import tensor from torch_geometric.data import Data -from hydragnn.utils.abstractrawdataset import AbstractRawDataset +from hydragnn.utils.datasets.abstractrawdataset import AbstractRawDataset -from ase.io.cfg import read_cfg from ase.io import read diff --git a/hydragnn/utils/descriptors_and_embeddings/__init__.py b/hydragnn/utils/descriptors_and_embeddings/__init__.py new file mode 100644 index 000000000..13b70bc6c --- /dev/null +++ b/hydragnn/utils/descriptors_and_embeddings/__init__.py @@ -0,0 +1,6 @@ +from .atomicdescriptors import atomicdescriptors +from .smiles_utils import ( + get_node_attribute_name, + generate_graphdata_from_smilestr, + generate_graphdata_from_rdkit_molecule, +) diff --git a/hydragnn/utils/atomicdescriptors.py b/hydragnn/utils/descriptors_and_embeddings/atomicdescriptors.py similarity index 100% rename from hydragnn/utils/atomicdescriptors.py rename to hydragnn/utils/descriptors_and_embeddings/atomicdescriptors.py diff --git a/hydragnn/utils/smiles_utils.py b/hydragnn/utils/descriptors_and_embeddings/smiles_utils.py similarity index 100% rename from hydragnn/utils/smiles_utils.py rename to hydragnn/utils/descriptors_and_embeddings/smiles_utils.py diff --git a/hydragnn/utils/distributed/__init__.py b/hydragnn/utils/distributed/__init__.py new file mode 100644 index 000000000..063642124 --- /dev/null +++ b/hydragnn/utils/distributed/__init__.py @@ -0,0 +1,16 @@ +from .distributed import ( + get_comm_size_and_rank, + get_device_list, + get_device, + get_device_name, + get_device_from_name, + is_model_distributed, + get_distributed_model, + setup_ddp, + nsplit, + comm_reduce, + get_deepspeed_init_args, + init_comm_size_and_rank, + check_remaining, + print_peak_memory, +) diff --git a/hydragnn/utils/distributed.py b/hydragnn/utils/distributed/distributed.py similarity index 99% rename from hydragnn/utils/distributed.py rename to hydragnn/utils/distributed/distributed.py index 50c853776..1cc86d3a4 100644 --- a/hydragnn/utils/distributed.py +++ b/hydragnn/utils/distributed/distributed.py @@ -15,14 +15,13 @@ import torch import torch.distributed as dist -from .print_utils import print_distributed +from hydragnn.utils.print.print_utils import print_distributed import psutil import socket from datetime import timedelta import time import subprocess -from mpi4py import MPI deepspeed_available = True try: diff --git a/hydragnn/utils/hpo/__init__.py b/hydragnn/utils/hpo/__init__.py new file mode 100644 index 000000000..d0cf926bd --- /dev/null +++ b/hydragnn/utils/hpo/__init__.py @@ -0,0 +1,7 @@ +from .deephyper import ( + master_from_host, + read_node_list, + create_ds_config, + read_job_node_list, + create_launch_command, +) diff --git a/hydragnn/utils/deephyper.py b/hydragnn/utils/hpo/deephyper.py similarity index 100% rename from hydragnn/utils/deephyper.py rename to hydragnn/utils/hpo/deephyper.py diff --git a/hydragnn/utils/input_config_parsing/__init__.py b/hydragnn/utils/input_config_parsing/__init__.py new file mode 100644 index 000000000..50e3e5176 --- /dev/null +++ b/hydragnn/utils/input_config_parsing/__init__.py @@ -0,0 +1,6 @@ +from .config_utils import ( + update_config, + update_config_minmax, + get_log_name_config, + save_config, +) diff --git a/hydragnn/utils/config_utils.py b/hydragnn/utils/input_config_parsing/config_utils.py similarity index 97% rename from hydragnn/utils/config_utils.py rename to hydragnn/utils/input_config_parsing/config_utils.py index 80762cc95..86525c766 100644 --- a/hydragnn/utils/config_utils.py +++ b/hydragnn/utils/input_config_parsing/config_utils.py @@ -10,15 +10,14 @@ ############################################################################## import pickle import os -from hydragnn.preprocess.utils import check_if_graph_size_variable, gather_deg -from hydragnn.utils.model import calculate_PNA_degree -from hydragnn.utils import get_comm_size_and_rank -import time +from hydragnn.preprocess.graph_samples_checks_and_updates import ( + check_if_graph_size_variable, + gather_deg, +) +from hydragnn.utils.distributed import get_comm_size_and_rank from copy import deepcopy import json -from torch_geometric.utils import degree import torch -import torch.distributed as dist def update_config(config, train_loader, val_loader, test_loader): @@ -47,7 +46,7 @@ def update_config(config, train_loader, val_loader, test_loader): if config["NeuralNetwork"]["Architecture"]["model_type"] == "PNA" or "PNAPlus": if hasattr(train_loader.dataset, "pna_deg"): - ## Use max neighbours used in the dataset. + ## Use max neighbours used in the datasets. deg = torch.tensor(train_loader.dataset.pna_deg) else: deg = gather_deg(train_loader.dataset) diff --git a/hydragnn/utils/model/__init__.py b/hydragnn/utils/model/__init__.py new file mode 100644 index 000000000..078ba616b --- /dev/null +++ b/hydragnn/utils/model/__init__.py @@ -0,0 +1,11 @@ +from .model import ( + save_model, + get_summary_writer, + unsorted_segment_mean, + load_existing_model, + load_existing_model_config, + loss_function_selection, + tensor_divide, + EarlyStopping, + print_model, +) diff --git a/hydragnn/utils/model.py b/hydragnn/utils/model/model.py similarity index 98% rename from hydragnn/utils/model.py rename to hydragnn/utils/model/model.py index 67253fcae..6b6d3eb56 100644 --- a/hydragnn/utils/model.py +++ b/hydragnn/utils/model/model.py @@ -15,9 +15,8 @@ import torch import torch.distributed as dist from torch.utils.tensorboard import SummaryWriter -from torch_geometric.data import Data from torch_geometric.utils import degree -from .print_utils import print_master, iterate_tqdm +from hydragnn.utils.print.print_utils import print_master, iterate_tqdm from hydragnn.utils.distributed import ( get_comm_size_and_rank, @@ -123,7 +122,7 @@ def load_existing_model( model.load_checkpoint(os.path.join(path, model_name), model_name) -## This function may cause OOM if dataset is too large +## This function may cause OOM if datasets is too large ## to fit in a single GPU (i.e., with DDP). Use with caution. ## Recommend to use calculate_PNA_degree_dist def calculate_PNA_degree(loader, max_neighbours): diff --git a/hydragnn/utils/optimizer/__init__.py b/hydragnn/utils/optimizer/__init__.py new file mode 100644 index 000000000..9d9dce433 --- /dev/null +++ b/hydragnn/utils/optimizer/__init__.py @@ -0,0 +1 @@ +from .optimizer import select_optimizer diff --git a/hydragnn/utils/optimizer.py b/hydragnn/utils/optimizer/optimizer.py similarity index 98% rename from hydragnn/utils/optimizer.py rename to hydragnn/utils/optimizer/optimizer.py index 6950e1146..af2fdcc32 100644 --- a/hydragnn/utils/optimizer.py +++ b/hydragnn/utils/optimizer/optimizer.py @@ -1,5 +1,5 @@ import torch -from .distributed import get_device_name +from hydragnn.utils.distributed import get_device_name from torch.distributed.optim import ZeroRedundancyOptimizer deepspeed_available = True diff --git a/hydragnn/utils/print/__init__.py b/hydragnn/utils/print/__init__.py new file mode 100644 index 000000000..8093611dc --- /dev/null +++ b/hydragnn/utils/print/__init__.py @@ -0,0 +1 @@ +from .print_utils import print_distributed, iterate_tqdm, setup_log diff --git a/hydragnn/utils/print_utils.py b/hydragnn/utils/print/print_utils.py similarity index 95% rename from hydragnn/utils/print_utils.py rename to hydragnn/utils/print/print_utils.py index f01facaf8..eb7329ecd 100644 --- a/hydragnn/utils/print_utils.py +++ b/hydragnn/utils/print/print_utils.py @@ -64,7 +64,7 @@ def setup_log(prefix): """ Setup logging to print messages for both screen and file. """ - from .distributed import init_comm_size_and_rank + from hydragnn.utils.distributed import init_comm_size_and_rank world_size, world_rank = init_comm_size_and_rank() @@ -100,7 +100,7 @@ def log(*args, sep=" ", rank=None): if rank is None: logger.info(sep.join(map(str, args))) else: - from .distributed import init_comm_size_and_rank + from hydragnn.utils.distributed import init_comm_size_and_rank world_size, world_rank = init_comm_size_and_rank() if rank == world_rank: diff --git a/hydragnn/utils/profiling_and_tracing/__init__.py b/hydragnn/utils/profiling_and_tracing/__init__.py new file mode 100644 index 000000000..55a3ee102 --- /dev/null +++ b/hydragnn/utils/profiling_and_tracing/__init__.py @@ -0,0 +1,3 @@ +from .profile import Profiler, ProfilerActivity +from .time_utils import Timer, TimerError +from .tracer import Tracer, GPTLTracer, SCOREPTracer diff --git a/hydragnn/utils/gptl4py_dummy.py b/hydragnn/utils/profiling_and_tracing/gptl4py_dummy.py similarity index 97% rename from hydragnn/utils/gptl4py_dummy.py rename to hydragnn/utils/profiling_and_tracing/gptl4py_dummy.py index 67cafe45f..4f8fd91e5 100644 --- a/hydragnn/utils/gptl4py_dummy.py +++ b/hydragnn/utils/profiling_and_tracing/gptl4py_dummy.py @@ -12,7 +12,6 @@ from __future__ import absolute_import from functools import wraps from contextlib import contextmanager -import torch.cuda.nvtx as nvtx def initialize(): diff --git a/hydragnn/utils/profile.py b/hydragnn/utils/profiling_and_tracing/profile.py similarity index 96% rename from hydragnn/utils/profile.py rename to hydragnn/utils/profiling_and_tracing/profile.py index a0e113324..0e15027c8 100644 --- a/hydragnn/utils/profile.py +++ b/hydragnn/utils/profiling_and_tracing/profile.py @@ -1,7 +1,7 @@ import torch import contextlib from unittest.mock import MagicMock -from torch.profiler import profile, record_function, ProfilerActivity +from torch.profiler import ProfilerActivity from hydragnn.utils.distributed import get_device_name diff --git a/hydragnn/utils/time_utils.py b/hydragnn/utils/profiling_and_tracing/time_utils.py similarity index 97% rename from hydragnn/utils/time_utils.py rename to hydragnn/utils/profiling_and_tracing/time_utils.py index f30bb9b11..ddd7b0251 100644 --- a/hydragnn/utils/time_utils.py +++ b/hydragnn/utils/profiling_and_tracing/time_utils.py @@ -11,8 +11,8 @@ import time import torch -from .distributed import get_comm_size_and_rank, get_device -from .print_utils import print_distributed +from hydragnn.utils.distributed import get_comm_size_and_rank, get_device +from hydragnn.utils.print.print_utils import print_distributed class TimerError(Exception): diff --git a/hydragnn/utils/tracer.py b/hydragnn/utils/profiling_and_tracing/tracer.py similarity index 98% rename from hydragnn/utils/tracer.py rename to hydragnn/utils/profiling_and_tracing/tracer.py index df2b5285f..a156f9cf6 100644 --- a/hydragnn/utils/tracer.py +++ b/hydragnn/utils/profiling_and_tracing/tracer.py @@ -6,10 +6,6 @@ from functools import wraps from contextlib import contextmanager -import os -import sys -from collections import OrderedDict - from abc import ABC, abstractmethod import torch from mpi4py import MPI diff --git a/tests/test_datasetclass_inheritance.py b/tests/test_datasetclass_inheritance.py index ba2e76e28..a742e0eca 100644 --- a/tests/test_datasetclass_inheritance.py +++ b/tests/test_datasetclass_inheritance.py @@ -20,10 +20,13 @@ import hydragnn, tests -from hydragnn.utils.config_utils import get_log_name_config +from hydragnn.utils.input_config_parsing.config_utils import get_log_name_config from hydragnn.utils.model import print_model -from hydragnn.utils.lsmsdataset import LSMSDataset -from hydragnn.utils.serializeddataset import SerializedWriter, SerializedDataset +from hydragnn.utils.datasets.lsmsdataset import LSMSDataset +from hydragnn.utils.datasets.serializeddataset import ( + SerializedWriter, + SerializedDataset, +) from hydragnn.preprocess.load_data import split_dataset diff --git a/tests/test_deepspeed.py b/tests/test_deepspeed.py index 2cd89b4e9..f111b282c 100644 --- a/tests/test_deepspeed.py +++ b/tests/test_deepspeed.py @@ -1,9 +1,5 @@ -import os, json import pytest -import torch -import random -import hydragnn from tests.test_graphs import unittest_train_model diff --git a/tests/test_graphs.py b/tests/test_graphs.py index 6409a77d4..c6f449dc5 100755 --- a/tests/test_graphs.py +++ b/tests/test_graphs.py @@ -18,7 +18,7 @@ import shutil import hydragnn, tests -from hydragnn.utils.config_utils import merge_config +from hydragnn.utils.input_config_parsing.config_utils import merge_config # Main unit test function called by pytest wrappers. diff --git a/tests/test_model_loadpred.py b/tests/test_model_loadpred.py index b301962a2..7e13fefda 100755 --- a/tests/test_model_loadpred.py +++ b/tests/test_model_loadpred.py @@ -68,7 +68,7 @@ def pytest_model_loadpred(): # get the directory of trained model log_name = hydragnn.utils.config_utils.get_log_name_config(config) modelfile = os.path.join("./logs/", log_name, log_name + ".pk") - # check if pretrained model and pkl dataset files exists + # check if pretrained model and pkl datasets files exists case_exist = True config_file = os.path.join("./logs/", log_name, "config.json") if not (os.path.isfile(modelfile) and os.path.isfile(config_file)): @@ -79,7 +79,7 @@ def pytest_model_loadpred(): config = json.load(f) for dataset_name, raw_data_path in config["Dataset"]["path"].items(): if not os.path.isfile(raw_data_path): - print(dataset_name, "dataset not found: ", raw_data_path) + print(dataset_name, "datasets not found: ", raw_data_path) case_exist = False break if not case_exist: diff --git a/tests/test_periodic_boundary_conditions.py b/tests/test_periodic_boundary_conditions.py index 43d92b46c..a81e0b9f6 100644 --- a/tests/test_periodic_boundary_conditions.py +++ b/tests/test_periodic_boundary_conditions.py @@ -9,12 +9,11 @@ # SPDX-License-Identifier: BSD-3-Clause # ############################################################################## -import sys, os, json, numpy as np -import pytest +import json, numpy as np import torch from torch_geometric.data import Data -from hydragnn.preprocess.utils import ( +from hydragnn.preprocess.graph_samples_checks_and_updates import ( get_radius_graph_config, get_radius_graph_pbc_config, ) diff --git a/tests/test_rotational_invariance.py b/tests/test_rotational_invariance.py index 6dfcf377b..f7ac970a2 100644 --- a/tests/test_rotational_invariance.py +++ b/tests/test_rotational_invariance.py @@ -9,15 +9,15 @@ # SPDX-License-Identifier: BSD-3-Clause # ############################################################################## -import sys, os, json +import json import pytest import torch from torch_geometric.data import Data from torch_geometric.transforms import Distance, NormalizeRotation -from hydragnn.preprocess.utils import get_radius_graph_config +from hydragnn.preprocess.graph_samples_checks_and_updates import get_radius_graph_config -from hydragnn.preprocess.utils import ( +from hydragnn.preprocess.graph_samples_checks_and_updates import ( check_data_samples_equivalence, ) From c7fae5263f0f7f064266116fa927ee80112c7da7 Mon Sep 17 00:00:00 2001 From: allaffa Date: Mon, 19 Aug 2024 16:16:51 -0400 Subject: [PATCH 02/28] bug fixed solved for tests --- examples/md17/md17.py | 10 +++++----- examples/qm9/qm9.py | 12 ++++++------ hydragnn/__init__.py | 1 + hydragnn/models/Base.py | 2 +- hydragnn/models/EGCLStack.py | 2 +- hydragnn/models/SCFStack.py | 2 +- hydragnn/models/create.py | 3 +-- hydragnn/train/train_validate_test.py | 16 +++++++--------- hydragnn/utils/model/__init__.py | 1 + hydragnn/utils/profiling_and_tracing/__init__.py | 3 --- tests/test_graphs.py | 6 +++--- 11 files changed, 27 insertions(+), 31 deletions(-) diff --git a/examples/md17/md17.py b/examples/md17/md17.py index 9ad829db4..370d20b7b 100644 --- a/examples/md17/md17.py +++ b/examples/md17/md17.py @@ -52,7 +52,7 @@ def md17_pre_filter(data): log_name = "md17_test" # Enable print to log file. -hydragnn.utils.setup_log(log_name) +hydragnn.utils.print.print_utils.setup_log(log_name) # Use built-in torch_geometric datasets. # Filter function above used to run quick example. @@ -76,13 +76,13 @@ def md17_pre_filter(data): train, val, test, config["NeuralNetwork"]["Training"]["batch_size"] ) -config = hydragnn.utils.update_config(config, train_loader, val_loader, test_loader) +config = hydragnn.utils.input_config_parsing.update_config(config, train_loader, val_loader, test_loader) model = hydragnn.models.create_model_config( config=config["NeuralNetwork"], verbosity=verbosity, ) -model = hydragnn.utils.get_distributed_model(model, verbosity) +model = hydragnn.utils.distributed.get_distributed_model(model, verbosity) learning_rate = config["NeuralNetwork"]["Training"]["Optimizer"]["learning_rate"] optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate) @@ -91,8 +91,8 @@ def md17_pre_filter(data): ) # Run training with the given model and qm9 datasets. -writer = hydragnn.utils.get_summary_writer(log_name) -hydragnn.utils.save_config(config, log_name) +writer = hydragnn.utils.model.model.get_summary_writer(log_name) +hydragnn.utils.input_config_parsing.save_config(config, log_name) hydragnn.train.train_validate_test( model, diff --git a/examples/qm9/qm9.py b/examples/qm9/qm9.py index 689717374..be6813f42 100644 --- a/examples/qm9/qm9.py +++ b/examples/qm9/qm9.py @@ -47,11 +47,11 @@ def qm9_pre_filter(data): var_config = config["NeuralNetwork"]["Variables_of_interest"] # Always initialize for multi-rank training. -world_size, world_rank = hydragnn.utils.setup_ddp() +world_size, world_rank = hydragnn.utils.distributed.setup_ddp() log_name = "qm9_test" # Enable print to log file. -hydragnn.utils.setup_log(log_name) +hydragnn.utils.print.print_utils.setup_log(log_name) # Use built-in torch_geometric datasets. # Filter function above used to run quick example. @@ -67,13 +67,13 @@ def qm9_pre_filter(data): train, val, test, config["NeuralNetwork"]["Training"]["batch_size"] ) -config = hydragnn.utils.update_config(config, train_loader, val_loader, test_loader) +config = hydragnn.utils.input_config_parsing.update_config(config, train_loader, val_loader, test_loader) model = hydragnn.models.create_model_config( config=config["NeuralNetwork"], verbosity=verbosity, ) -model = hydragnn.utils.get_distributed_model(model, verbosity) +model = hydragnn.utils.distributed.get_distributed_model(model, verbosity) learning_rate = config["NeuralNetwork"]["Training"]["Optimizer"]["learning_rate"] optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate) @@ -82,8 +82,8 @@ def qm9_pre_filter(data): ) # Run training with the given model and qm9 datasets. -writer = hydragnn.utils.get_summary_writer(log_name) -hydragnn.utils.save_config(config, log_name) +writer = hydragnn.utils.model.model.get_summary_writer(log_name) +hydragnn.utils.input_config_parsing.save_config(config, log_name) hydragnn.train.train_validate_test( model, diff --git a/hydragnn/__init__.py b/hydragnn/__init__.py index b008f952e..acbfaa786 100644 --- a/hydragnn/__init__.py +++ b/hydragnn/__init__.py @@ -1,2 +1,3 @@ +from . import preprocess, models, train, postprocess, utils from .run_training import run_training from .run_prediction import run_prediction diff --git a/hydragnn/models/Base.py b/hydragnn/models/Base.py index bb863ec69..186d0209f 100644 --- a/hydragnn/models/Base.py +++ b/hydragnn/models/Base.py @@ -19,7 +19,7 @@ from hydragnn.utils.model import activation_function_selection, loss_function_selection import sys from hydragnn.utils.distributed import get_device -from hydragnn.utils.print_utils import print_master +from hydragnn.utils.print.print_utils import print_master import inspect diff --git a/hydragnn/models/EGCLStack.py b/hydragnn/models/EGCLStack.py index 639196555..7109d0fc3 100644 --- a/hydragnn/models/EGCLStack.py +++ b/hydragnn/models/EGCLStack.py @@ -15,7 +15,7 @@ from torch_geometric.nn import Sequential from .Base import Base -from ..utils import unsorted_segment_mean +from hydragnn.utils.model import unsorted_segment_mean class EGCLStack(Base): diff --git a/hydragnn/models/SCFStack.py b/hydragnn/models/SCFStack.py index 7c67cffd5..4f66ae6b5 100644 --- a/hydragnn/models/SCFStack.py +++ b/hydragnn/models/SCFStack.py @@ -26,7 +26,7 @@ from .Base import Base -from ..utils import unsorted_segment_mean +from hydragnn.utils.model import unsorted_segment_mean class SCFStack(Base): diff --git a/hydragnn/models/create.py b/hydragnn/models/create.py index 4e4f6e2f3..c39f1414f 100644 --- a/hydragnn/models/create.py +++ b/hydragnn/models/create.py @@ -25,8 +25,7 @@ from hydragnn.models.EGCLStack import EGCLStack from hydragnn.utils.distributed import get_device -from hydragnn.utils.print_utils import print_distributed -from hydragnn.utils.time_utils import Timer +from hydragnn.utils.profiling_and_tracing.time_utils import Timer def create_model_config( diff --git a/hydragnn/train/train_validate_test.py b/hydragnn/train/train_validate_test.py index 3232da2a8..5195d5134 100644 --- a/hydragnn/train/train_validate_test.py +++ b/hydragnn/train/train_validate_test.py @@ -17,23 +17,21 @@ from hydragnn.preprocess.serialized_dataset_loader import SerializedDataLoader from hydragnn.postprocess.postprocess import output_denormalize from hydragnn.postprocess.visualizer import Visualizer -from hydragnn.utils.print_utils import print_distributed, iterate_tqdm, log -from hydragnn.utils.time_utils import Timer -from hydragnn.utils.profile import Profiler -from hydragnn.utils.distributed import get_device, print_peak_memory, check_remaining -from hydragnn.preprocess.load_data import HydraDataLoader -from hydragnn.utils.model import Checkpoint, EarlyStopping +from hydragnn.utils.print.print_utils import print_distributed, iterate_tqdm +from hydragnn.utils.profiling_and_tracing.time_utils import Timer +from hydragnn.utils.profiling_and_tracing.profile import Profiler +from hydragnn.utils.distributed import get_device, check_remaining +from hydragnn.utils.model.model import Checkpoint, EarlyStopping import os from torch.profiler import record_function -import contextlib -from unittest.mock import MagicMock + from hydragnn.utils.distributed import get_comm_size_and_rank import torch.distributed as dist import pickle -import hydragnn.utils.tracer as tr +import hydragnn.utils.profiling_and_tracing.tracer as tr import time from mpi4py import MPI diff --git a/hydragnn/utils/model/__init__.py b/hydragnn/utils/model/__init__.py index 078ba616b..0ec38efbd 100644 --- a/hydragnn/utils/model/__init__.py +++ b/hydragnn/utils/model/__init__.py @@ -1,4 +1,5 @@ from .model import ( + activation_function_selection, save_model, get_summary_writer, unsorted_segment_mean, diff --git a/hydragnn/utils/profiling_and_tracing/__init__.py b/hydragnn/utils/profiling_and_tracing/__init__.py index 55a3ee102..e69de29bb 100644 --- a/hydragnn/utils/profiling_and_tracing/__init__.py +++ b/hydragnn/utils/profiling_and_tracing/__init__.py @@ -1,3 +0,0 @@ -from .profile import Profiler, ProfilerActivity -from .time_utils import Timer, TimerError -from .tracer import Tracer, GPTLTracer, SCOREPTracer diff --git a/tests/test_graphs.py b/tests/test_graphs.py index c6f449dc5..6222d707b 100755 --- a/tests/test_graphs.py +++ b/tests/test_graphs.py @@ -30,7 +30,7 @@ def unittest_train_model( use_deepspeed=False, overwrite_config=None, ): - world_size, rank = hydragnn.utils.get_comm_size_and_rank() + world_size, rank = hydragnn.utils.distributed.get_comm_size_and_rank() os.environ["SERIALIZED_DATA_PATH"] = os.getcwd() @@ -167,7 +167,7 @@ def unittest_train_model( + " < " + str(thresholds[model_type][0]) ) - hydragnn.utils.print_distributed(verbosity, "head: " + error_str) + hydragnn.utils.print.print_distributed(verbosity, "head: " + error_str) assert ( error_head_mse < thresholds[model_type][0] ), "Head RMSE checking failed for " + str(ihead) @@ -188,7 +188,7 @@ def unittest_train_model( # Check RMSE error error_str = str("{:.6f}".format(error)) + " < " + str(thresholds[model_type][0]) - hydragnn.utils.print_distributed(verbosity, "total: " + error_str) + hydragnn.utils.print.print_distributed(verbosity, "total: " + error_str) assert error < thresholds[model_type][0], "Total RMSE checking failed!" + str(error) From 366f6fe39306c8828b29d4592a0ace94795f0192 Mon Sep 17 00:00:00 2001 From: allaffa Date: Mon, 19 Aug 2024 16:25:10 -0400 Subject: [PATCH 03/28] black formatting fixed --- examples/md17/md17.py | 4 +++- examples/qm9/qm9.py | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/examples/md17/md17.py b/examples/md17/md17.py index 370d20b7b..0ce3b0b2e 100644 --- a/examples/md17/md17.py +++ b/examples/md17/md17.py @@ -76,7 +76,9 @@ def md17_pre_filter(data): train, val, test, config["NeuralNetwork"]["Training"]["batch_size"] ) -config = hydragnn.utils.input_config_parsing.update_config(config, train_loader, val_loader, test_loader) +config = hydragnn.utils.input_config_parsing.update_config( + config, train_loader, val_loader, test_loader +) model = hydragnn.models.create_model_config( config=config["NeuralNetwork"], diff --git a/examples/qm9/qm9.py b/examples/qm9/qm9.py index be6813f42..953402b26 100644 --- a/examples/qm9/qm9.py +++ b/examples/qm9/qm9.py @@ -67,7 +67,9 @@ def qm9_pre_filter(data): train, val, test, config["NeuralNetwork"]["Training"]["batch_size"] ) -config = hydragnn.utils.input_config_parsing.update_config(config, train_loader, val_loader, test_loader) +config = hydragnn.utils.input_config_parsing.update_config( + config, train_loader, val_loader, test_loader +) model = hydragnn.models.create_model_config( config=config["NeuralNetwork"], From 9a6ce372caef3e00c0518c4fc3f53b4e33fa0477 Mon Sep 17 00:00:00 2001 From: allaffa Date: Mon, 19 Aug 2024 17:12:12 -0400 Subject: [PATCH 04/28] examples corrected --- examples/alexandria/train.py | 20 +++++++------- examples/ani1_x/train.py | 20 +++++++------- examples/csce/train_gap.py | 26 ++++++++++--------- examples/eam/eam.py | 18 +++++++------ examples/ising_model/train_ising.py | 16 +++++++----- examples/lsms/lsms.py | 18 +++++++------ examples/mptrj/train.py | 23 ++++++++-------- examples/multidataset/train.py | 20 +++++++------- examples/multidataset_hpo/gfm.py | 20 +++++++------- examples/ogb/train_gap.py | 26 +++++++++++-------- examples/open_catalyst_2020/train.py | 22 +++++++++------- examples/open_catalyst_2022/train.py | 20 +++++++------- examples/qm7x/train.py | 22 +++++++++------- .../utils/profiling_and_tracing/__init__.py | 1 + tests/test_atomicdescriptors.py | 4 ++- tests/test_optimizer.py | 3 +-- 16 files changed, 154 insertions(+), 125 deletions(-) diff --git a/examples/alexandria/train.py b/examples/alexandria/train.py index 82969111d..417a13385 100644 --- a/examples/alexandria/train.py +++ b/examples/alexandria/train.py @@ -366,7 +366,7 @@ def get(self, idx): ################################################################################################################## # Always initialize for multi-rank training. - comm_size, rank = hydragnn.utils.setup_ddp() + comm_size, rank = hydragnn.utils.distributed.setup_ddp() ################################################################################################################## comm = MPI.COMM_WORLD @@ -379,8 +379,8 @@ def get(self, idx): ) log_name = "Alexandria" if args.log is None else args.log - hydragnn.utils.setup_log(log_name) - writer = hydragnn.utils.get_summary_writer(log_name) + hydragnn.utils.print.setup_log(log_name) + writer = hydragnn.utils.model.get_summary_writer(log_name) log("Command: {0}\n".format(" ".join([x for x in sys.argv])), rank=0) @@ -513,11 +513,13 @@ def get(self, idx): trainset, valset, testset, config["NeuralNetwork"]["Training"]["batch_size"] ) - config = hydragnn.utils.update_config(config, train_loader, val_loader, test_loader) + config = hydragnn.utils.input_config_parsing.update_config( + config, train_loader, val_loader, test_loader + ) ## Good to sync with everyone right after DDStore setup comm.Barrier() - hydragnn.utils.save_config(config, log_name) + hydragnn.utils.input_config_parsing.save_config(config, log_name) timer.stop() @@ -525,7 +527,7 @@ def get(self, idx): config=config["NeuralNetwork"], verbosity=verbosity, ) - model = hydragnn.utils.get_distributed_model(model, verbosity) + model = hydragnn.utils.distributed.get_distributed_model(model, verbosity) # Print details of neural network architecture print_model(model) @@ -536,7 +538,7 @@ def get(self, idx): optimizer, mode="min", factor=0.5, patience=5, min_lr=0.00001 ) - hydragnn.utils.load_existing_model_config( + hydragnn.utils.model.load_existing_model_config( model, config["NeuralNetwork"]["Training"], optimizer=optimizer ) @@ -556,8 +558,8 @@ def get(self, idx): create_plots=False, ) - hydragnn.utils.save_model(model, optimizer, log_name) - hydragnn.utils.print_timers(verbosity) + hydragnn.utils.model.save_model(model, optimizer, log_name) + hydragnn.utils.profiling_and_tracing.print_timers(verbosity) if tr.has("GPTLTracer"): import gptl4py as gp diff --git a/examples/ani1_x/train.py b/examples/ani1_x/train.py index c4c5a25ec..057d57b1a 100644 --- a/examples/ani1_x/train.py +++ b/examples/ani1_x/train.py @@ -246,7 +246,7 @@ def get(self, idx): ################################################################################################################## # Always initialize for multi-rank training. - comm_size, rank = hydragnn.utils.setup_ddp() + comm_size, rank = hydragnn.utils.distributed.setup_ddp() ################################################################################################################## comm = MPI.COMM_WORLD @@ -259,8 +259,8 @@ def get(self, idx): ) log_name = "ANI1x" if args.log is None else args.log - hydragnn.utils.setup_log(log_name) - writer = hydragnn.utils.get_summary_writer(log_name) + hydragnn.utils.print.print_utils.setup_log(log_name) + writer = hydragnn.utils.model.get_summary_writer(log_name) log("Command: {0}\n".format(" ".join([x for x in sys.argv])), rank=0) @@ -393,11 +393,13 @@ def get(self, idx): trainset, valset, testset, config["NeuralNetwork"]["Training"]["batch_size"] ) - config = hydragnn.utils.update_config(config, train_loader, val_loader, test_loader) + config = hydragnn.utils.input_config_parsing.update_config( + config, train_loader, val_loader, test_loader + ) ## Good to sync with everyone right after DDStore setup comm.Barrier() - hydragnn.utils.save_config(config, log_name) + hhydragnn.utils.input_config_parsing.save_config(config, log_name) timer.stop() @@ -405,7 +407,7 @@ def get(self, idx): config=config["NeuralNetwork"], verbosity=verbosity, ) - model = hydragnn.utils.get_distributed_model(model, verbosity) + model = hydragnn.utils.distributed.get_distributed_model(model, verbosity) # Print details of neural network architecture print_model(model) @@ -416,7 +418,7 @@ def get(self, idx): optimizer, mode="min", factor=0.5, patience=5, min_lr=0.00001 ) - hydragnn.utils.load_existing_model_config( + hydragnn.utils.model.load_existing_model_config( model, config["NeuralNetwork"]["Training"], optimizer=optimizer ) @@ -436,8 +438,8 @@ def get(self, idx): create_plots=False, ) - hydragnn.utils.save_model(model, optimizer, log_name) - hydragnn.utils.print_timers(verbosity) + hydragnn.utils.model.save_model(model, optimizer, log_name) + hydragnn.utils.profiling_and_tracing.print_timers(verbosity) if tr.has("GPTLTracer"): import gptl4py as gp diff --git a/examples/csce/train_gap.py b/examples/csce/train_gap.py index 37c89bb0b..6a44b825c 100644 --- a/examples/csce/train_gap.py +++ b/examples/csce/train_gap.py @@ -9,7 +9,9 @@ import logging import sys + from mpi4py import MPI + import argparse import hydragnn @@ -231,7 +233,7 @@ def __getitem__(self, idx): ) = get_node_attribute_name(csce_node_types) ################################################################################################################## # Always initialize for multi-rank training. - comm_size, rank = hydragnn.utils.setup_ddp() + comm_size, rank = hydragnn.utils.distributed.setup_ddp() ################################################################################################################## comm = MPI.COMM_WORLD @@ -246,8 +248,8 @@ def __getitem__(self, idx): log_name = "csce_" + inputfilesubstr + "_eV_fullx" if args.log is not None: log_name = args.log - hydragnn.utils.setup_log(log_name) - writer = hydragnn.utils.get_summary_writer(log_name) + hydragnn.utils.print.print_utils.setup_log(log_name) + writer = hydragnn.utils.model.get_summary_writer(log_name) log("Command: {0}\n".format(" ".join([x for x in sys.argv])), rank=0) @@ -346,9 +348,7 @@ def __getitem__(self, idx): os.environ["HYDRAGNN_USE_ddstore"] = "1" opt = {"preload": False, "shmem": shmem, "ddstore": ddstore} - fname = fname = os.path.join( - os.path.dirname(__file__), "datasets", "csce_gap.bp" - ) + fname = os.path.join(os.path.dirname(__file__), "datasets", "csce_gap.bp") trainset = AdiosDataset(fname, "trainset", comm, **opt) valset = AdiosDataset(fname, "valset", comm) testset = AdiosDataset(fname, "testset", comm) @@ -390,10 +390,12 @@ def __getitem__(self, idx): ) comm.Barrier() - config = hydragnn.utils.update_config(config, train_loader, val_loader, test_loader) + config = hydragnn.utils.input_config_parsing.update_config( + config, train_loader, val_loader, test_loader + ) comm.Barrier() - hydragnn.utils.save_config(config, log_name) + hydragnn.utils.input_config_parsing.save_config(config, log_name) comm.Barrier() timer.stop() @@ -402,7 +404,7 @@ def __getitem__(self, idx): config=config["NeuralNetwork"], verbosity=verbosity, ) - model = hydragnn.utils.get_distributed_model(model, verbosity) + model = hydragnn.utils.distributed.get_distributed_model(model, verbosity) learning_rate = config["NeuralNetwork"]["Training"]["Optimizer"]["learning_rate"] optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate) @@ -410,7 +412,7 @@ def __getitem__(self, idx): optimizer, mode="min", factor=0.5, patience=5, min_lr=0.00001 ) - hydragnn.utils.load_existing_model_config( + hydragnn.utils.model.load_existing_model_config( model, config["NeuralNetwork"]["Training"], optimizer=optimizer ) @@ -430,8 +432,8 @@ def __getitem__(self, idx): create_plots=False, ) - hydragnn.utils.save_model(model, optimizer, log_name) - hydragnn.utils.print_timers(verbosity) + hydragnn.utils.model.save_model(model, optimizer, log_name) + hydragnn.utils.profiling_and_tracing.time_utils.print_timers(verbosity) if args.mae: import matplotlib.pyplot as plt diff --git a/examples/eam/eam.py b/examples/eam/eam.py index 7c6340c0c..ab07b8d8b 100644 --- a/examples/eam/eam.py +++ b/examples/eam/eam.py @@ -66,10 +66,10 @@ def info(*args, logtype="info", sep=" "): input_filename = os.path.join(dirpwd, args.inputfile) with open(input_filename, "r") as f: config = json.load(f) - hydragnn.utils.setup_log(get_log_name_config(config)) + hydragnn.utils.print.setup_log(get_log_name_config(config)) ################################################################################################################## # Always initialize for multi-rank training. - comm_size, rank = hydragnn.utils.setup_ddp() + comm_size, rank = hydragnn.utils.distributed.setup_ddp() ################################################################################################################## comm = MPI.COMM_WORLD ## Set up logging @@ -174,7 +174,9 @@ def info(*args, logtype="info", sep=" "): ) timer.stop() - config = hydragnn.utils.update_config(config, train_loader, val_loader, test_loader) + config = hydragnn.utils.input_config_parsing.update_config( + config, train_loader, val_loader, test_loader + ) config["NeuralNetwork"]["Variables_of_interest"].pop("minmax_node_feature", None) config["NeuralNetwork"]["Variables_of_interest"].pop("minmax_graph_feature", None) @@ -187,7 +189,7 @@ def info(*args, logtype="info", sep=" "): print_model(model) comm.Barrier() - model = hydragnn.utils.get_distributed_model(model, verbosity) + model = hydragnn.utils.distributed.get_distributed_model(model, verbosity) learning_rate = config["NeuralNetwork"]["Training"]["Optimizer"]["learning_rate"] optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate) @@ -196,12 +198,12 @@ def info(*args, logtype="info", sep=" "): ) log_name = get_log_name_config(config) - writer = hydragnn.utils.get_summary_writer(log_name) + writer = hydragnn.utils.model.get_summary_writer(log_name) if dist.is_initialized(): dist.barrier() - hydragnn.utils.save_config(config, log_name) + hydragnn.utils.input_config_parsing.save_config(config, log_name) hydragnn.train.train_validate_test( model, @@ -217,7 +219,7 @@ def info(*args, logtype="info", sep=" "): create_plots=True, ) - hydragnn.utils.save_model(model, optimizer, log_name) - hydragnn.utils.print_timers(verbosity) + hydragnn.utils.model.save_model(model, optimizer, log_name) + hydragnn.utils.profiling_and_tracing.time_utils.print_timers(verbosity) sys.exit(0) diff --git a/examples/ising_model/train_ising.py b/examples/ising_model/train_ising.py index de654c17f..491f76982 100644 --- a/examples/ising_model/train_ising.py +++ b/examples/ising_model/train_ising.py @@ -192,10 +192,10 @@ def info(*args, logtype="info", sep=" "): log_name = get_log_name_config(config) if args.log is not None: log_name = args.log - hydragnn.utils.setup_log(log_name) + hydragnn.utils.print.print_utils.setup_log(log_name) ################################################################################################################## # Always initialize for multi-rank training. - comm_size, rank = hydragnn.utils.setup_ddp() + comm_size, rank = hydragnn.utils.distributed.setup_ddp() ################################################################################################################## comm = MPI.COMM_WORLD @@ -367,7 +367,9 @@ def info(*args, logtype="info", sep=" "): config["NeuralNetwork"]["Variables_of_interest"][ "minmax_graph_feature" ] = trainset.minmax_graph_feature - config = hydragnn.utils.update_config(config, train_loader, val_loader, test_loader) + config = hydragnn.utils.input_config_parsing.update_config( + config, train_loader, val_loader, test_loader + ) del config["NeuralNetwork"]["Variables_of_interest"]["minmax_node_feature"] del config["NeuralNetwork"]["Variables_of_interest"]["minmax_graph_feature"] ## Good to sync with everyone right after DDStore setup @@ -382,7 +384,7 @@ def info(*args, logtype="info", sep=" "): print_model(model) comm.Barrier() - model = hydragnn.utils.get_distributed_model(model, verbosity) + model = hydragnn.utils.distributed.get_distributed_model(model, verbosity) learning_rate = config["NeuralNetwork"]["Training"]["Optimizer"]["learning_rate"] optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate) @@ -390,7 +392,7 @@ def info(*args, logtype="info", sep=" "): optimizer, mode="min", factor=0.5, patience=5, min_lr=0.00001 ) - writer = hydragnn.utils.get_summary_writer(log_name) + writer = hydragnn.utils.model.get_summary_writer(log_name) if dist.is_initialized(): dist.barrier() @@ -410,8 +412,8 @@ def info(*args, logtype="info", sep=" "): verbosity, ) - hydragnn.utils.save_model(model, optimizer, log_name) - hydragnn.utils.print_timers(verbosity) + hydragnn.utils.model.save_model(model, optimizer, log_name) + hydragnn.utils.profiling_and_tracing.print_timers(verbosity) if tr.has("GPTLTracer"): import gptl4py as gp diff --git a/examples/lsms/lsms.py b/examples/lsms/lsms.py index 77ef10e92..e4157e943 100644 --- a/examples/lsms/lsms.py +++ b/examples/lsms/lsms.py @@ -68,10 +68,10 @@ def info(*args, logtype="info", sep=" "): input_filename = os.path.join(dirpwd, args.inputfile) with open(input_filename, "r") as f: config = json.load(f) - hydragnn.utils.setup_log(get_log_name_config(config)) + hydragnn.utils.print.setup_log(get_log_name_config(config)) ################################################################################################################## # Always initialize for multi-rank training. - comm_size, rank = hydragnn.utils.setup_ddp() + comm_size, rank = hydragnn.utils.distributed.setup_ddp() ################################################################################################################## comm = MPI.COMM_WORLD ## Set up logging @@ -177,7 +177,9 @@ def info(*args, logtype="info", sep=" "): ) timer.stop() - config = hydragnn.utils.update_config(config, train_loader, val_loader, test_loader) + config = hydragnn.utils.input_config_parsing.update_config( + config, train_loader, val_loader, test_loader + ) config["NeuralNetwork"]["Variables_of_interest"].pop("minmax_node_feature", None) config["NeuralNetwork"]["Variables_of_interest"].pop("minmax_graph_feature", None) @@ -190,7 +192,7 @@ def info(*args, logtype="info", sep=" "): print_model(model) comm.Barrier() - model = hydragnn.utils.get_distributed_model(model, verbosity) + model = hydragnn.utils.distributed.get_distributed_model(model, verbosity) learning_rate = config["NeuralNetwork"]["Training"]["Optimizer"]["learning_rate"] optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate) @@ -199,12 +201,12 @@ def info(*args, logtype="info", sep=" "): ) log_name = get_log_name_config(config) - writer = hydragnn.utils.get_summary_writer(log_name) + writer = hydragnn.utils.model.get_summary_writer(log_name) if dist.is_initialized(): dist.barrier() - hydragnn.utils.save_config(config, log_name) + hydragnn.utils.input_config_parsing.save_config(config, log_name) hydragnn.train.train_validate_test( model, @@ -220,7 +222,7 @@ def info(*args, logtype="info", sep=" "): create_plots=True, ) - hydragnn.utils.save_model(model, optimizer, log_name) - hydragnn.utils.print_timers(verbosity) + hydragnn.utils.model.save_model(model, optimizer, log_name) + hydragnn.utils.profiling_and_tracing.print_timers(verbosity) sys.exit(0) diff --git a/examples/mptrj/train.py b/examples/mptrj/train.py index 190501e59..188ebfca1 100644 --- a/examples/mptrj/train.py +++ b/examples/mptrj/train.py @@ -1,4 +1,4 @@ -import os, re, json +import os, json import logging import sys from mpi4py import MPI @@ -48,7 +48,6 @@ except ImportError: pass -import subprocess from hydragnn.utils.distributed import nsplit @@ -262,7 +261,7 @@ def get(self, idx): ################################################################################################################## # Always initialize for multi-rank training. - comm_size, rank = hydragnn.utils.setup_ddp() + comm_size, rank = hydragnn.utils.distributed.setup_ddp() ################################################################################################################## comm = MPI.COMM_WORLD @@ -275,8 +274,8 @@ def get(self, idx): ) log_name = "MPTrj" if args.log is None else args.log - hydragnn.utils.setup_log(log_name) - writer = hydragnn.utils.get_summary_writer(log_name) + hydragnn.utils.print.print_utils.setup_log(log_name) + writer = hydragnn.utils.model.get_summary_writer(log_name) log("Command: {0}\n".format(" ".join([x for x in sys.argv])), rank=0) @@ -410,11 +409,13 @@ def get(self, idx): trainset, valset, testset, config["NeuralNetwork"]["Training"]["batch_size"] ) - config = hydragnn.utils.update_config(config, train_loader, val_loader, test_loader) + config = hydragnn.utils.input_config_parsing.update_config( + config, train_loader, val_loader, test_loader + ) ## Good to sync with everyone right after DDStore setup comm.Barrier() - hydragnn.utils.save_config(config, log_name) + hydragnn.utils.input_config_parsing.save_config(config, log_name) timer.stop() @@ -422,7 +423,7 @@ def get(self, idx): config=config["NeuralNetwork"], verbosity=verbosity, ) - model = hydragnn.utils.get_distributed_model(model, verbosity) + model = hydragnn.utils.distributed.get_distributed_model(model, verbosity) # Print details of neural network architecture print_model(model) @@ -433,7 +434,7 @@ def get(self, idx): optimizer, mode="min", factor=0.5, patience=5, min_lr=0.00001 ) - hydragnn.utils.load_existing_model_config( + hydragnn.utils.model.load_existing_model_config( model, config["NeuralNetwork"]["Training"], optimizer=optimizer ) @@ -453,8 +454,8 @@ def get(self, idx): create_plots=False, ) - hydragnn.utils.save_model(model, optimizer, log_name) - hydragnn.utils.print_timers(verbosity) + hydragnn.utils.model.save_model(model, optimizer, log_name) + hydragnn.utils.profiling_and_tracing.print_timers(verbosity) if tr.has("GPTLTracer"): import gptl4py as gp diff --git a/examples/multidataset/train.py b/examples/multidataset/train.py index 525bd84be..d147fe464 100644 --- a/examples/multidataset/train.py +++ b/examples/multidataset/train.py @@ -122,7 +122,7 @@ def info(*args, logtype="info", sep=" "): ################################################################################################################## # Always initialize for multi-rank training. - comm_size, rank = hydragnn.utils.setup_ddp() + comm_size, rank = hydragnn.utils.distributed.setup_ddp() ################################################################################################################## comm = MPI.COMM_WORLD @@ -135,8 +135,8 @@ def info(*args, logtype="info", sep=" "): ) log_name = "GFM" if args.log is None else args.log - hydragnn.utils.setup_log(log_name) - writer = hydragnn.utils.get_summary_writer(log_name) + hydragnn.utils.print.setup_log(log_name) + writer = hydragnn.utils.model.get_summary_writer(log_name) log("Command: {0}\n".format(" ".join([x for x in sys.argv])), rank=0) @@ -346,11 +346,13 @@ def info(*args, logtype="info", sep=" "): test_sampler_shuffle=False, ) - config = hydragnn.utils.update_config(config, train_loader, val_loader, test_loader) + config = hydragnn.utils.input_config_parsing.update_config( + config, train_loader, val_loader, test_loader + ) ## Good to sync with everyone right after DDStore setup comm.Barrier() - hydragnn.utils.save_config(config, log_name) + hydragnn.utils.input_config_parsing.save_config(config, log_name) timer.stop() @@ -358,7 +360,7 @@ def info(*args, logtype="info", sep=" "): config=config["NeuralNetwork"], verbosity=verbosity, ) - model = hydragnn.utils.get_distributed_model(model, verbosity) + model = hydragnn.utils.distributed.get_distributed_model(model, verbosity) # Print details of neural network architecture print_model(model) @@ -369,7 +371,7 @@ def info(*args, logtype="info", sep=" "): optimizer, mode="min", factor=0.5, patience=5, min_lr=0.00001 ) - hydragnn.utils.load_existing_model_config( + hydragnn.utils.model.load_existing_model_config( model, config["NeuralNetwork"]["Training"], optimizer=optimizer ) @@ -389,8 +391,8 @@ def info(*args, logtype="info", sep=" "): create_plots=False, ) - hydragnn.utils.save_model(model, optimizer, log_name) - hydragnn.utils.print_timers(verbosity) + hydragnn.utils.model.save_model(model, optimizer, log_name) + hydragnn.utils.profiling_and_tracing.print_timers(verbosity) if tr.has("GPTLTracer"): import gptl4py as gp diff --git a/examples/multidataset_hpo/gfm.py b/examples/multidataset_hpo/gfm.py index d2b99fdb1..3752a065a 100644 --- a/examples/multidataset_hpo/gfm.py +++ b/examples/multidataset_hpo/gfm.py @@ -151,7 +151,7 @@ def main(): ################################################################################################################## # Always initialize for multi-rank training. - comm_size, rank = hydragnn.utils.setup_ddp() + comm_size, rank = hydragnn.utils.distributed.setup_ddp() ################################################################################################################## comm = MPI.COMM_WORLD @@ -164,8 +164,8 @@ def main(): ) log_name = "gfm_test" if args.log is None else args.log - hydragnn.utils.setup_log(log_name) - writer = hydragnn.utils.get_summary_writer(log_name) + hydragnn.utils.print.setup_log(log_name) + writer = hydragnn.utils.model.get_summary_writer(log_name) log("Command: {0}\n".format(" ".join([x for x in sys.argv])), rank=0) @@ -361,11 +361,13 @@ def main(): trainset, valset, testset, config["NeuralNetwork"]["Training"]["batch_size"] ) - config = hydragnn.utils.update_config(config, train_loader, val_loader, test_loader) + config = hydragnn.utils.input_config_parsing.update_config( + config, train_loader, val_loader, test_loader + ) ## Good to sync with everyone right after DDStore setup comm.Barrier() - hydragnn.utils.save_config(config, log_name) + hydragnn.utils.input_config_parsing.save_config(config, log_name) timer.stop() @@ -373,7 +375,7 @@ def main(): config=config["NeuralNetwork"], verbosity=verbosity, ) - model = hydragnn.utils.get_distributed_model(model, verbosity) + model = hydragnn.utils.distributed.get_distributed_model(model, verbosity) # Print details of neural network architecture print_model(model) @@ -384,7 +386,7 @@ def main(): optimizer, mode="min", factor=0.5, patience=5, min_lr=0.00001 ) - hydragnn.utils.load_existing_model_config( + hydragnn.utils.model.load_existing_model_config( model, config["NeuralNetwork"]["Training"], optimizer=optimizer ) @@ -404,8 +406,8 @@ def main(): create_plots=False, ) - hydragnn.utils.save_model(model, optimizer, log_name) - hydragnn.utils.print_timers(verbosity) + hydragnn.utils.model.save_model(model, optimizer, log_name) + hydragnn.utils.profiling_and_tracing.print_timers(verbosity) if tr.has("GPTLTracer"): import gptl4py as gp diff --git a/examples/ogb/train_gap.py b/examples/ogb/train_gap.py index 586927427..ba240da51 100644 --- a/examples/ogb/train_gap.py +++ b/examples/ogb/train_gap.py @@ -93,7 +93,7 @@ def info(*args, logtype="info", sep=" "): getattr(logging, logtype)(sep.join(map(str, args))) -from hydragnn.utils.abstractbasedataset import AbstractBaseDataset +from hydragnn.utils.datasets import AbstractBaseDataset def smiles_to_graph(datadir, files_list): @@ -315,7 +315,9 @@ def __getitem__(self, idx): var_config["node_feature_dims"] = var_config["input_node_feature_dims"] ################################################################################################################## # Always initialize for multi-rank training. - comm_size, rank = hydragnn.utils.setup_ddp(use_deepspeed=args.use_deepspeed) + comm_size, rank = hydragnn.utils.distributed.setup_ddp( + use_deepspeed=args.use_deepspeed + ) ################################################################################################################## comm = MPI.COMM_WORLD @@ -328,9 +330,9 @@ def __getitem__(self, idx): ) log_name = "ogb_" + inputfilesubstr - hydragnn.utils.setup_log(log_name) - writer = hydragnn.utils.get_summary_writer(log_name) - hydragnn.utils.save_config(config, log_name) + hydragnn.utils.print.setup_log(log_name) + writer = hydragnn.utils.model.get_summary_writer(log_name) + hydragnn.utils.input_config_parsing.save_config(config, log_name) modelname = "ogb_" + inputfilesubstr if args.preonly: @@ -446,7 +448,9 @@ def __getitem__(self, idx): trainset, valset, testset, config["NeuralNetwork"]["Training"]["batch_size"] ) - config = hydragnn.utils.update_config(config, train_loader, val_loader, test_loader) + config = hydragnn.utils.input_config_parsing.update_config( + config, train_loader, val_loader, test_loader + ) timer.stop() model = hydragnn.models.create_model_config( @@ -459,7 +463,7 @@ def __getitem__(self, idx): dist.barrier() if not args.use_deepspeed: - model = hydragnn.utils.get_distributed_model(model, verbosity) + model = hydragnn.utils.distributed.get_distributed_model(model, verbosity) learning_rate = config["NeuralNetwork"]["Training"]["Optimizer"][ "learning_rate" @@ -469,7 +473,7 @@ def __getitem__(self, idx): optimizer, mode="min", factor=0.5, patience=5, min_lr=0.00001 ) - hydragnn.utils.load_existing_model_config( + hydragnn.utils.model.load_existing_model_config( model, config["NeuralNetwork"]["Training"], optimizer=optimizer ) @@ -496,7 +500,7 @@ def __getitem__(self, idx): optimizer=optimizer, ) # scheduler is not managed by deepspeed because it is per-epoch instead of per-step - hydragnn.utils.load_existing_model_config( + hydragnn.utils.model.load_existing_model_config( model, config["NeuralNetwork"]["Training"], use_deepspeed=True ) @@ -517,8 +521,8 @@ def __getitem__(self, idx): use_deepspeed=args.use_deepspeed, ) - hydragnn.utils.save_model(model, optimizer, log_name) - hydragnn.utils.print_timers(verbosity) + hydragnn.utils.model.save_model(model, optimizer, log_name) + hydragnn.utils.profiling_and_tracing.print_timers(verbosity) if args.mae: ################################################################################################################## diff --git a/examples/open_catalyst_2020/train.py b/examples/open_catalyst_2020/train.py index 577892290..ad5097a2f 100644 --- a/examples/open_catalyst_2020/train.py +++ b/examples/open_catalyst_2020/train.py @@ -37,7 +37,7 @@ pass import subprocess -from hydragnn.utils import nsplit +from hydragnn.utils.distributed import nsplit ## FIMME torch.backends.cudnn.enabled = False @@ -209,7 +209,7 @@ def get(self, idx): ################################################################################################################## # Always initialize for multi-rank training. - comm_size, rank = hydragnn.utils.setup_ddp() + comm_size, rank = hydragnn.utils.distributed.setup_ddp() ################################################################################################################## comm = MPI.COMM_WORLD @@ -222,8 +222,8 @@ def get(self, idx): ) log_name = "OC2020" if args.log is None else args.log - hydragnn.utils.setup_log(log_name) - writer = hydragnn.utils.get_summary_writer(log_name) + hydragnn.utils.print.setup_log(log_name) + writer = hydragnn.utils.model.get_summary_writer(log_name) log("Command: {0}\n".format(" ".join([x for x in sys.argv])), rank=0) @@ -363,11 +363,13 @@ def get(self, idx): trainset, valset, testset, config["NeuralNetwork"]["Training"]["batch_size"] ) - config = hydragnn.utils.update_config(config, train_loader, val_loader, test_loader) + config = hydragnn.utils.input_config_parsing.update_config( + config, train_loader, val_loader, test_loader + ) ## Good to sync with everyone right after DDStore setup comm.Barrier() - hydragnn.utils.save_config(config, log_name) + hydragnn.utils.input_config_parsing.save_config(config, log_name) timer.stop() @@ -375,7 +377,7 @@ def get(self, idx): config=config["NeuralNetwork"], verbosity=verbosity, ) - model = hydragnn.utils.get_distributed_model(model, verbosity) + model = hydragnn.utils.distributed.get_distributed_model(model, verbosity) # Print details of neural network architecture print_model(model) @@ -386,7 +388,7 @@ def get(self, idx): optimizer, mode="min", factor=0.5, patience=5, min_lr=0.00001 ) - hydragnn.utils.load_existing_model_config( + hydragnn.utils.model.load_existing_model_config( model, config["NeuralNetwork"]["Training"], optimizer=optimizer ) @@ -406,8 +408,8 @@ def get(self, idx): create_plots=False, ) - hydragnn.utils.save_model(model, optimizer, log_name) - hydragnn.utils.print_timers(verbosity) + hydragnn.utils.model.save_model(model, optimizer, log_name) + hydragnn.utils.profiling_and_tracing.print_timers(verbosity) if tr.has("GPTLTracer"): import gptl4py as gp diff --git a/examples/open_catalyst_2022/train.py b/examples/open_catalyst_2022/train.py index 268539ff3..06a945034 100644 --- a/examples/open_catalyst_2022/train.py +++ b/examples/open_catalyst_2022/train.py @@ -264,7 +264,7 @@ def get(self, idx): ################################################################################################################## # Always initialize for multi-rank training. - comm_size, rank = hydragnn.utils.setup_ddp() + comm_size, rank = hydragnn.utils.distributed.setup_ddp() ################################################################################################################## comm = MPI.COMM_WORLD @@ -277,8 +277,8 @@ def get(self, idx): ) log_name = "OC2022" if args.log is None else args.log - hydragnn.utils.setup_log(log_name) - writer = hydragnn.utils.get_summary_writer(log_name) + hydragnn.utils.print.setup_log(log_name) + writer = hydragnn.utils.model.get_summary_writer(log_name) log("Command: {0}\n".format(" ".join([x for x in sys.argv])), rank=0) @@ -425,11 +425,13 @@ def get(self, idx): trainset, valset, testset, config["NeuralNetwork"]["Training"]["batch_size"] ) - config = hydragnn.utils.update_config(config, train_loader, val_loader, test_loader) + config = hydragnn.utils.input_config_parsing.update_config( + config, train_loader, val_loader, test_loader + ) ## Good to sync with everyone right after DDStore setup comm.Barrier() - hydragnn.utils.save_config(config, log_name) + hydragnn.utils.input_config_parsing.save_config(config, log_name) timer.stop() @@ -437,7 +439,7 @@ def get(self, idx): config=config["NeuralNetwork"], verbosity=verbosity, ) - model = hydragnn.utils.get_distributed_model(model, verbosity) + model = hydragnn.utils.distributed.get_distributed_model(model, verbosity) # Print details of neural network architecture print_model(model) @@ -448,7 +450,7 @@ def get(self, idx): optimizer, mode="min", factor=0.5, patience=5, min_lr=0.00001 ) - hydragnn.utils.load_existing_model_config( + hydragnn.utils.model.load_existing_model_config( model, config["NeuralNetwork"]["Training"], optimizer=optimizer ) @@ -468,8 +470,8 @@ def get(self, idx): create_plots=False, ) - hydragnn.utils.save_model(model, optimizer, log_name) - hydragnn.utils.print_timers(verbosity) + hydragnn.utils.model.save_model(model, optimizer, log_name) + hydragnn.utils.profiling_and_tracing.print_timers(verbosity) if tr.has("GPTLTracer"): import gptl4py as gp diff --git a/examples/qm7x/train.py b/examples/qm7x/train.py index 66cde44c4..56db1d5d3 100644 --- a/examples/qm7x/train.py +++ b/examples/qm7x/train.py @@ -68,7 +68,7 @@ def info(*args, logtype="info", sep=" "): getattr(logging, logtype)(sep.join(map(str, args))) -from hydragnn.utils.abstractbasedataset import AbstractBaseDataset +from hydragnn.utils.datasets import AbstractBaseDataset # FIXME: this radis cutoff overwrites the radius cutoff currently written in the JSON file create_graph_fromXYZ = RadiusGraph(r=5.0) # radius cutoff in angstrom @@ -305,7 +305,7 @@ def get(self, idx): ################################################################################################################## # Always initialize for multi-rank training. - comm_size, rank = hydragnn.utils.setup_ddp() + comm_size, rank = hydragnn.utils.distributed.setup_ddp() ################################################################################################################## comm = MPI.COMM_WORLD @@ -318,8 +318,8 @@ def get(self, idx): ) log_name = "qm7x" if args.log is None else args.log - hydragnn.utils.setup_log(log_name) - writer = hydragnn.utils.get_summary_writer(log_name) + hydragnn.utils.print.setup_log(log_name) + writer = hydragnn.utils.model.get_summary_writer(log_name) log("Command: {0}\n".format(" ".join([x for x in sys.argv])), rank=0) @@ -454,11 +454,13 @@ def get(self, idx): trainset, valset, testset, config["NeuralNetwork"]["Training"]["batch_size"] ) - config = hydragnn.utils.update_config(config, train_loader, val_loader, test_loader) + config = hydragnn.utils.input_config_parsing.update_config( + config, train_loader, val_loader, test_loader + ) ## Good to sync with everyone right after DDStore setup comm.Barrier() - hydragnn.utils.save_config(config, log_name) + hydragnn.utils.input_config_parsing.save_config(config, log_name) timer.stop() @@ -466,7 +468,7 @@ def get(self, idx): config=config["NeuralNetwork"], verbosity=verbosity, ) - model = hydragnn.utils.get_distributed_model(model, verbosity) + model = hydragnn.utils.distributed.get_distributed_model(model, verbosity) learning_rate = config["NeuralNetwork"]["Training"]["Optimizer"]["learning_rate"] optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate) @@ -474,7 +476,7 @@ def get(self, idx): optimizer, mode="min", factor=0.5, patience=5, min_lr=0.00001 ) - hydragnn.utils.load_existing_model_config( + hydragnn.utils.model.load_existing_model_config( model, config["NeuralNetwork"]["Training"], optimizer=optimizer ) @@ -494,8 +496,8 @@ def get(self, idx): create_plots=False, ) - hydragnn.utils.save_model(model, optimizer, log_name) - hydragnn.utils.print_timers(verbosity) + hydragnn.utils.model.save_model(model, optimizer, log_name) + hydragnn.utils.profiling_and_tracing.print_timers(verbosity) if tr.has("GPTLTracer"): import gptl4py as gp diff --git a/hydragnn/utils/profiling_and_tracing/__init__.py b/hydragnn/utils/profiling_and_tracing/__init__.py index e69de29bb..25c9d8399 100644 --- a/hydragnn/utils/profiling_and_tracing/__init__.py +++ b/hydragnn/utils/profiling_and_tracing/__init__.py @@ -0,0 +1 @@ +from .time_utils import print_timers diff --git a/tests/test_atomicdescriptors.py b/tests/test_atomicdescriptors.py index 0dc96ed94..3d98b2304 100644 --- a/tests/test_atomicdescriptors.py +++ b/tests/test_atomicdescriptors.py @@ -18,7 +18,9 @@ @pytest.mark.mpi_skip() def pytest_atomicdescriptors(): file_path = os.path.join( - os.path.dirname(__file__), "..", "hydragnn/utils/atomicdescriptors.py" + os.path.dirname(__file__), + "..", + "hydragnn/utils/descriptors_and_embeddings/atomicdescriptors.py", ) return_code = subprocess.call(["python", file_path]) diff --git a/tests/test_optimizer.py b/tests/test_optimizer.py index 656067c46..c70d9de29 100644 --- a/tests/test_optimizer.py +++ b/tests/test_optimizer.py @@ -15,13 +15,12 @@ import shutil import hydragnn, tests -from hydragnn.utils.distributed import get_device_name # Loss function unit test called by pytest wrappers. # Note the intent of this test is to make sure all interfaces work - it does not assert anything def unittest_optimizers(optimizer_type, use_zero, ci_input, overwrite_data=False): - world_size, rank = hydragnn.utils.get_comm_size_and_rank() + world_size, rank = hydragnn.utils.distributed.get_comm_size_and_rank() os.environ["SERIALIZED_DATA_PATH"] = os.getcwd() From 7084ee9c8c857af1e4f4b3a4f6a3f5cfdb8940c6 Mon Sep 17 00:00:00 2001 From: allaffa Date: Mon, 19 Aug 2024 17:32:37 -0400 Subject: [PATCH 05/28] test_model_loadpred.py fixed --- tests/test_model_loadpred.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_model_loadpred.py b/tests/test_model_loadpred.py index 7e13fefda..481acfc57 100755 --- a/tests/test_model_loadpred.py +++ b/tests/test_model_loadpred.py @@ -29,9 +29,9 @@ def unittest_model_prediction(config): verbosity=config["Verbosity"]["level"], ) - model = hydragnn.utils.get_distributed_model(model, config["Verbosity"]["level"]) + model = hydragnn.utils.distributed.get_distributed_model(model, config["Verbosity"]["level"]) - log_name = hydragnn.utils.config_utils.get_log_name_config(config) + log_name = hydragnn.utils.input_config_parsing.get_log_name_config(config) hydragnn.utils.model.load_existing_model(model, log_name) model.eval() From a8e0da17e74eb77cb7883d77ed46322365af012a Mon Sep 17 00:00:00 2001 From: allaffa Date: Mon, 19 Aug 2024 19:01:32 -0400 Subject: [PATCH 06/28] black formatting fixed --- tests/test_model_loadpred.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/test_model_loadpred.py b/tests/test_model_loadpred.py index 481acfc57..9a9ce0b9a 100755 --- a/tests/test_model_loadpred.py +++ b/tests/test_model_loadpred.py @@ -29,7 +29,9 @@ def unittest_model_prediction(config): verbosity=config["Verbosity"]["level"], ) - model = hydragnn.utils.distributed.get_distributed_model(model, config["Verbosity"]["level"]) + model = hydragnn.utils.distributed.get_distributed_model( + model, config["Verbosity"]["level"] + ) log_name = hydragnn.utils.input_config_parsing.get_log_name_config(config) hydragnn.utils.model.load_existing_model(model, log_name) From 526a884d5c51b519e7f506f9be73deabfe24feb5 Mon Sep 17 00:00:00 2001 From: allaffa Date: Mon, 19 Aug 2024 19:50:46 -0400 Subject: [PATCH 07/28] test_loss_and_activation_functions.py fixed --- tests/test_loss_and_activation_functions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_loss_and_activation_functions.py b/tests/test_loss_and_activation_functions.py index 22ce821a3..e0a73e739 100644 --- a/tests/test_loss_and_activation_functions.py +++ b/tests/test_loss_and_activation_functions.py @@ -22,7 +22,7 @@ def unittest_loss_and_activation_functions( activation_function_type, loss_function_type, ci_input, overwrite_data=False ): - world_size, rank = hydragnn.utils.get_comm_size_and_rank() + world_size, rank = hydragnn.utils.distributed.get_comm_size_and_rank() os.environ["SERIALIZED_DATA_PATH"] = os.getcwd() From 11da4fb4e56f2d7a1371bed75aa0c779c36e4167 Mon Sep 17 00:00:00 2001 From: allaffa Date: Mon, 19 Aug 2024 20:15:49 -0400 Subject: [PATCH 08/28] black formatting fixed --- tests/test_model_loadpred.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/test_model_loadpred.py b/tests/test_model_loadpred.py index 9a9ce0b9a..a8d650b43 100755 --- a/tests/test_model_loadpred.py +++ b/tests/test_model_loadpred.py @@ -68,7 +68,9 @@ def pytest_model_loadpred(): config = json.load(f) config["NeuralNetwork"]["Architecture"]["model_type"] = model_type # get the directory of trained model - log_name = hydragnn.utils.config_utils.get_log_name_config(config) + log_name = hydragnn.utils.input_config_parsing.config_utils.get_log_name_config( + config + ) modelfile = os.path.join("./logs/", log_name, log_name + ".pk") # check if pretrained model and pkl datasets files exists case_exist = True From 0329a1496545663dd10042e2c61a4e9d3564f722 Mon Sep 17 00:00:00 2001 From: allaffa Date: Fri, 23 Aug 2024 12:22:27 -0400 Subject: [PATCH 09/28] reverting inadvertent automated refactoring of dataset forlder into datasets --- examples/alexandria/train.py | 16 +++++++------- examples/ani1_x/train.py | 16 +++++++------- examples/csce/train_gap.py | 32 +++++++++++++--------------- examples/eam/eam.py | 18 +++++++--------- examples/ising_model/train_ising.py | 18 ++++++++-------- examples/lsms/lsms.py | 10 ++++----- examples/md17/md17.py | 4 ++-- examples/mptrj/train.py | 16 +++++++------- examples/multidataset/train.py | 20 ++++++++--------- examples/multidataset_hpo/gfm.py | 20 ++++++++--------- examples/ogb/train_gap.py | 18 ++++++++-------- examples/open_catalyst_2020/train.py | 10 ++++----- examples/open_catalyst_2022/train.py | 16 +++++++------- examples/qm7x/train.py | 12 +++++------ examples/qm9/qm9.py | 2 +- examples/qm9_hpo/qm9.py | 2 +- examples/qm9_hpo/qm9_deephyper.py | 2 +- examples/qm9_hpo/qm9_optuna.py | 2 +- 18 files changed, 114 insertions(+), 120 deletions(-) diff --git a/examples/alexandria/train.py b/examples/alexandria/train.py index 417a13385..0dfbfd56a 100644 --- a/examples/alexandria/train.py +++ b/examples/alexandria/train.py @@ -317,7 +317,7 @@ def get(self, idx): type=bool, default=True, ) - parser.add_argument("--ddstore", action="store_true", help="ddstore datasets") + parser.add_argument("--ddstore", action="store_true", help="ddstore dataset") parser.add_argument("--ddstore_width", type=int, help="ddstore width", default=None) parser.add_argument("--shmem", action="store_true", help="shmem") parser.add_argument("--log", help="log name") @@ -327,14 +327,14 @@ def get(self, idx): group = parser.add_mutually_exclusive_group() group.add_argument( "--adios", - help="Adios datasets", + help="Adios dataset", action="store_const", dest="format", const="adios", ) group.add_argument( "--pickle", - help="Pickle datasets", + help="Pickle dataset", action="store_const", dest="format", const="pickle", @@ -347,7 +347,7 @@ def get(self, idx): node_feature_names = ["atomic_number", "cartesian_coordinates", "forces"] node_feature_dims = [1, 3, 3] dirpwd = os.path.dirname(os.path.abspath(__file__)) - datadir = os.path.join(dirpwd, "datasets") + datadir = os.path.join(dirpwd, "dataset") ################################################################################################################## input_filename = os.path.join(dirpwd, args.inputfile) ################################################################################################################## @@ -409,7 +409,7 @@ def get(self, idx): ## adios if args.format == "adios": fname = os.path.join( - os.path.dirname(__file__), "./datasets/%s.bp" % modelname + os.path.dirname(__file__), "./dataset/%s.bp" % modelname ) adwriter = AdiosWriter(fname, comm) adwriter.add("trainset", trainset) @@ -423,7 +423,7 @@ def get(self, idx): ## pickle elif args.format == "pickle": basedir = os.path.join( - os.path.dirname(__file__), "datasets", "%s.pickle" % modelname + os.path.dirname(__file__), "dataset", "%s.pickle" % modelname ) attrs = dict() attrs["pna_deg"] = deg @@ -468,14 +468,14 @@ def get(self, idx): "ddstore": args.ddstore, "ddstore_width": args.ddstore_width, } - fname = os.path.join(os.path.dirname(__file__), "./datasets/%s.bp" % modelname) + fname = os.path.join(os.path.dirname(__file__), "./dataset/%s.bp" % modelname) trainset = AdiosDataset(fname, "trainset", comm, **opt, var_config=var_config) valset = AdiosDataset(fname, "valset", comm, **opt, var_config=var_config) testset = AdiosDataset(fname, "testset", comm, **opt, var_config=var_config) elif args.format == "pickle": info("Pickle load") basedir = os.path.join( - os.path.dirname(__file__), "datasets", "%s.pickle" % modelname + os.path.dirname(__file__), "dataset", "%s.pickle" % modelname ) trainset = SimplePickleDataset( basedir=basedir, label="trainset", var_config=var_config diff --git a/examples/ani1_x/train.py b/examples/ani1_x/train.py index 057d57b1a..68cb00f35 100644 --- a/examples/ani1_x/train.py +++ b/examples/ani1_x/train.py @@ -197,7 +197,7 @@ def get(self, idx): type=bool, default=True, ) - parser.add_argument("--ddstore", action="store_true", help="ddstore datasets") + parser.add_argument("--ddstore", action="store_true", help="ddstore dataset") parser.add_argument("--ddstore_width", type=int, help="ddstore width", default=None) parser.add_argument("--shmem", action="store_true", help="shmem") parser.add_argument("--log", help="log name") @@ -207,14 +207,14 @@ def get(self, idx): group = parser.add_mutually_exclusive_group() group.add_argument( "--adios", - help="Adios datasets", + help="Adios dataset", action="store_const", dest="format", const="adios", ) group.add_argument( "--pickle", - help="Pickle datasets", + help="Pickle ", action="store_const", dest="format", const="pickle", @@ -227,7 +227,7 @@ def get(self, idx): node_feature_names = ["atomic_number", "cartesian_coordinates", "forces"] node_feature_dims = [1, 3, 3] dirpwd = os.path.dirname(os.path.abspath(__file__)) - datadir = os.path.join(dirpwd, "datasets") + datadir = os.path.join(dirpwd, "dataset") ################################################################################################################## input_filename = os.path.join(dirpwd, args.inputfile) ################################################################################################################## @@ -289,7 +289,7 @@ def get(self, idx): ## adios if args.format == "adios": fname = os.path.join( - os.path.dirname(__file__), "./datasets/%s.bp" % modelname + os.path.dirname(__file__), "./dataset/%s.bp" % modelname ) adwriter = AdiosWriter(fname, comm) adwriter.add("trainset", trainset) @@ -303,7 +303,7 @@ def get(self, idx): ## pickle elif args.format == "pickle": basedir = os.path.join( - os.path.dirname(__file__), "datasets", "%s.pickle" % modelname + os.path.dirname(__file__), "dataset", "%s.pickle" % modelname ) attrs = dict() attrs["pna_deg"] = deg @@ -348,14 +348,14 @@ def get(self, idx): "ddstore": args.ddstore, "ddstore_width": args.ddstore_width, } - fname = os.path.join(os.path.dirname(__file__), "./datasets/%s.bp" % modelname) + fname = os.path.join(os.path.dirname(__file__), "./dataset/%s.bp" % modelname) trainset = AdiosDataset(fname, "trainset", comm, **opt, var_config=var_config) valset = AdiosDataset(fname, "valset", comm, **opt, var_config=var_config) testset = AdiosDataset(fname, "testset", comm, **opt, var_config=var_config) elif args.format == "pickle": info("Pickle load") basedir = os.path.join( - os.path.dirname(__file__), "datasets", "%s.pickle" % modelname + os.path.dirname(__file__), "dataset", "%s.pickle" % modelname ) trainset = SimplePickleDataset( basedir=basedir, label="trainset", var_config=var_config diff --git a/examples/csce/train_gap.py b/examples/csce/train_gap.py index 6a44b825c..f4d2cacba 100644 --- a/examples/csce/train_gap.py +++ b/examples/csce/train_gap.py @@ -166,42 +166,42 @@ def __getitem__(self, idx): group = parser.add_mutually_exclusive_group() group.add_argument( "--adios", - help="Adios datasets", + help="Adios dataset", action="store_const", dest="format", const="adios", ) group.add_argument( "--pickle", - help="Pickle datasets", + help="Pickle dataset", action="store_const", dest="format", const="pickle", ) group.add_argument( - "--csv", help="CSV datasets", action="store_const", dest="format", const="csv" + "--csv", help="CSV dataset", action="store_const", dest="format", const="csv" ) parser.set_defaults(format="adios") group1 = parser.add_mutually_exclusive_group() group1.add_argument( "--shmem", - help="shmem datasets", + help="shmem dataset", action="store_const", - dest="datasets", + dest="dataset", const="shmem", ) group1.add_argument( "--ddstore", - help="ddstore datasets", + help="ddstore dataset", action="store_const", - dest="datasets", + dest="dataset", const="ddstore", ) group1.add_argument( "--simple", - help="no special datasets", + help="no special dataset", action="store_const", - dest="datasets", + dest="dataset", const="simple", ) parser.set_defaults(dataset="simple") @@ -211,7 +211,7 @@ def __getitem__(self, idx): graph_feature_names = ["GAP"] graph_feature_dim = [1] dirpwd = os.path.dirname(os.path.abspath(__file__)) - datafile = os.path.join(dirpwd, "datasets/csce_gap_synth.csv") + datafile = os.path.join(dirpwd, "dataset/csce_gap_synth.csv") ################################################################################################################## inputfilesubstr = args.inputfilesubstr input_filename = os.path.join(dirpwd, "csce_" + inputfilesubstr + ".json") @@ -298,7 +298,7 @@ def __getitem__(self, idx): config["pna_deg"] = deg ## pickle - basedir = os.path.join(os.path.dirname(__file__), "datasets", "pickle") + basedir = os.path.join(os.path.dirname(__file__), "dataset", "pickle") attrs = dict() attrs["pna_deg"] = deg SimplePickleWriter( @@ -321,7 +321,7 @@ def __getitem__(self, idx): use_subdir=True, ) - fname = os.path.join(os.path.dirname(__file__), "datasets", "csce_gap.bp") + fname = os.path.join(os.path.dirname(__file__), "dataset", "csce_gap.bp") adwriter = AdiosWriter(fname, comm) adwriter.add("trainset", trainset) adwriter.add("valset", valset) @@ -348,21 +348,19 @@ def __getitem__(self, idx): os.environ["HYDRAGNN_USE_ddstore"] = "1" opt = {"preload": False, "shmem": shmem, "ddstore": ddstore} - fname = os.path.join(os.path.dirname(__file__), "datasets", "csce_gap.bp") + fname = os.path.join(os.path.dirname(__file__), "dataset", "csce_gap.bp") trainset = AdiosDataset(fname, "trainset", comm, **opt) valset = AdiosDataset(fname, "valset", comm) testset = AdiosDataset(fname, "testset", comm) comm.Barrier() elif args.format == "csv": - fname = os.path.join( - os.path.dirname(__file__), "datasets", "csce_gap_synth.csv" - ) + fname = os.path.join(os.path.dirname(__file__), "dataset", "csce_gap_synth.csv") fact = CSCEDatasetFactory(fname, args.sampling, var_config=var_config) trainset = CSCEDataset(fact, "trainset") valset = CSCEDataset(fact, "valset") testset = CSCEDataset(fact, "testset") elif args.format == "pickle": - basedir = os.path.join(os.path.dirname(__file__), "datasets", "pickle") + basedir = os.path.join(os.path.dirname(__file__), "dataset", "pickle") trainset = SimplePickleDataset(basedir, "trainset") valset = SimplePickleDataset(basedir, "valset") testset = SimplePickleDataset(basedir, "testset") diff --git a/examples/eam/eam.py b/examples/eam/eam.py index ab07b8d8b..4de2127b6 100644 --- a/examples/eam/eam.py +++ b/examples/eam/eam.py @@ -46,14 +46,14 @@ def info(*args, logtype="info", sep=" "): group = parser.add_mutually_exclusive_group() group.add_argument( "--adios", - help="Adios datasets", + help="Adios dataset", action="store_const", dest="format", const="adios", ) group.add_argument( "--pickle", - help="Pickle datasets", + help="Pickle dataset", action="store_const", dest="format", const="pickle", @@ -79,9 +79,9 @@ def info(*args, logtype="info", sep=" "): datefmt="%H:%M:%S", ) - os.environ["SERIALIZED_DATA_PATH"] = dirpwd + "/datasets" + os.environ["SERIALIZED_DATA_PATH"] = dirpwd + "/dataset" datasetname = config["Dataset"]["name"] - fname_adios = dirpwd + "/datasets/%s.bp" % (datasetname) + fname_adios = dirpwd + "/dataset/%s.bp" % (datasetname) config["Dataset"]["name"] = "%s_%d" % (datasetname, rank) if not args.loadexistingsplit: total = CFGDataset(config) @@ -95,7 +95,7 @@ def info(*args, logtype="info", sep=" "): if args.format == "adios": fname = os.path.join( - os.path.dirname(__file__), "./datasets/%s.bp" % datasetname + os.path.dirname(__file__), "./dataset/%s.bp" % datasetname ) adwriter = AdiosWriter(fname, MPI.COMM_SELF) adwriter.add("trainset", trainset) @@ -106,7 +106,7 @@ def info(*args, logtype="info", sep=" "): adwriter.save() elif args.format == "pickle": basedir = os.path.join( - os.path.dirname(__file__), "datasets", "serialized_dataset" + os.path.dirname(__file__), "dataset", "serialized_dataset" ) SerializedWriter( trainset, @@ -140,16 +140,14 @@ def info(*args, logtype="info", sep=" "): "preload": True, "shmem": False, } - fname = os.path.join( - os.path.dirname(__file__), "./datasets/%s.bp" % datasetname - ) + fname = os.path.join(os.path.dirname(__file__), "./dataset/%s.bp" % datasetname) trainset = AdiosDataset(fname, "trainset", comm, **opt) valset = AdiosDataset(fname, "valset", comm, **opt) testset = AdiosDataset(fname, "testset", comm, **opt) elif args.format == "pickle": info("Pickle load") basedir = os.path.join( - os.path.dirname(__file__), "datasets", "serialized_dataset" + os.path.dirname(__file__), "dataset", "serialized_dataset" ) trainset = SerializedDataset(basedir, datasetname, "trainset") valset = SerializedDataset(basedir, datasetname, "valset") diff --git a/examples/ising_model/train_ising.py b/examples/ising_model/train_ising.py index 491f76982..c2279a627 100644 --- a/examples/ising_model/train_ising.py +++ b/examples/ising_model/train_ising.py @@ -93,7 +93,7 @@ def create_dataset_mpi( os.makedirs(subdir, exist_ok=True) for num_downs in iterate_tqdm( - range(rx.start, rx.stop), verbosity_level=2, desc="Creating datasets" + range(rx.start, rx.stop), verbosity_level=2, desc="Creating dataset" ): prefix = "output_%d_" % num_downs subdir = os.path.join(dir, str(num_downs)) @@ -162,21 +162,21 @@ def info(*args, logtype="info", sep=" "): ) parser.add_argument("--seed", type=int, help="seed", default=43) parser.add_argument("--sampling", type=float, help="sampling ratio", default=None) - parser.add_argument("--ddstore", action="store_true", help="ddstore datasets") + parser.add_argument("--ddstore", action="store_true", help="ddstore dataset") parser.add_argument("--ddstore_width", type=int, help="ddstore width", default=None) parser.add_argument("--log", help="log name") parser.add_argument("--everyone", action="store_true", help="gptimer") group = parser.add_mutually_exclusive_group() group.add_argument( "--adios", - help="Adios datasets", + help="Adios dataset", action="store_const", dest="format", const="adios", ) group.add_argument( "--pickle", - help="Pickle datasets", + help="Pickle dataset", action="store_const", dest="format", const="pickle", @@ -231,7 +231,7 @@ def info(*args, logtype="info", sep=" "): 4. Save as Adios file in parallel """ sys.setrecursionlimit(1000000) - dir = os.path.join(os.path.dirname(__file__), "./datasets/%s" % modelname) + dir = os.path.join(os.path.dirname(__file__), "./dataset/%s" % modelname) if rank == 0: if os.path.exists(dir): shutil.rmtree(dir) @@ -270,7 +270,7 @@ def info(*args, logtype="info", sep=" "): config["pna_deg"] = deg basedir = os.path.join( - os.path.dirname(__file__), "datasets", "%s.pickle" % modelname + os.path.dirname(__file__), "dataset", "%s.pickle" % modelname ) attrs = dict() attrs["minmax_node_feature"] = total.minmax_node_feature @@ -296,7 +296,7 @@ def info(*args, logtype="info", sep=" "): use_subdir=True, ) - fname = os.path.join(os.path.dirname(__file__), "./datasets/%s.bp" % modelname) + fname = os.path.join(os.path.dirname(__file__), "./dataset/%s.bp" % modelname) adwriter = AdiosWriter(fname, comm) adwriter.add("trainset", trainset) adwriter.add("valset", valset) @@ -321,14 +321,14 @@ def info(*args, logtype="info", sep=" "): "ddstore": args.ddstore, "ddstore_width": args.ddstore_width, } - fname = os.path.join(os.path.dirname(__file__), "./datasets/%s.bp" % modelname) + fname = os.path.join(os.path.dirname(__file__), "./dataset/%s.bp" % modelname) trainset = AdiosDataset(fname, "trainset", comm, **opt) valset = AdiosDataset(fname, "valset", comm, **opt) testset = AdiosDataset(fname, "testset", comm, **opt) elif args.format == "pickle": info("Pickle load") basedir = os.path.join( - os.path.dirname(__file__), "datasets", "%s.pickle" % modelname + os.path.dirname(__file__), "dataset", "%s.pickle" % modelname ) trainset = SimplePickleDataset(basedir, "trainset") valset = SimplePickleDataset(basedir, "valset") diff --git a/examples/lsms/lsms.py b/examples/lsms/lsms.py index e4157e943..8a64e353c 100644 --- a/examples/lsms/lsms.py +++ b/examples/lsms/lsms.py @@ -48,14 +48,14 @@ def info(*args, logtype="info", sep=" "): group = parser.add_mutually_exclusive_group() group.add_argument( "--adios", - help="Adios datasets", + help="Adios dataset", action="store_const", dest="format", const="adios", ) group.add_argument( "--pickle", - help="Pickle datasets", + help="Pickle dataset", action="store_const", dest="format", const="pickle", @@ -98,7 +98,7 @@ def info(*args, logtype="info", sep=" "): if args.format == "adios": fname = os.path.join( - os.path.dirname(__file__), "./datasets/%s.bp" % datasetname + os.path.dirname(__file__), "./dataset/%s.bp" % datasetname ) adwriter = AdiosWriter(fname, MPI.COMM_SELF) adwriter.add("trainset", trainset) @@ -143,9 +143,7 @@ def info(*args, logtype="info", sep=" "): "preload": True, "shmem": False, } - fname = os.path.join( - os.path.dirname(__file__), "./datasets/%s.bp" % datasetname - ) + fname = os.path.join(os.path.dirname(__file__), "./dataset/%s.bp" % datasetname) trainset = AdiosDataset(fname, "trainset", comm, **opt) valset = AdiosDataset(fname, "valset", comm, **opt) testset = AdiosDataset(fname, "testset", comm, **opt) diff --git a/examples/md17/md17.py b/examples/md17/md17.py index 0ce3b0b2e..83695df9c 100644 --- a/examples/md17/md17.py +++ b/examples/md17/md17.py @@ -64,7 +64,7 @@ def md17_pre_filter(data): torch_geometric.datasets.MD17.file_names["uracil"] = "md17_uracil.npz" dataset = torch_geometric.datasets.MD17( - root="datasets/md17", + root="dataset/md17", name="uracil", pre_transform=md17_pre_transform, pre_filter=md17_pre_filter, @@ -92,7 +92,7 @@ def md17_pre_filter(data): optimizer, mode="min", factor=0.5, patience=5, min_lr=0.00001 ) -# Run training with the given model and qm9 datasets. +# Run training with the given model and md17 dataset. writer = hydragnn.utils.model.model.get_summary_writer(log_name) hydragnn.utils.input_config_parsing.save_config(config, log_name) diff --git a/examples/mptrj/train.py b/examples/mptrj/train.py index 188ebfca1..8dee83db6 100644 --- a/examples/mptrj/train.py +++ b/examples/mptrj/train.py @@ -207,7 +207,7 @@ def get(self, idx): type=bool, default=True, ) - parser.add_argument("--ddstore", action="store_true", help="ddstore datasets") + parser.add_argument("--ddstore", action="store_true", help="ddstore dataset") parser.add_argument("--ddstore_width", type=int, help="ddstore width", default=None) parser.add_argument("--shmem", action="store_true", help="shmem") parser.add_argument("--log", help="log name") @@ -222,14 +222,14 @@ def get(self, idx): group = parser.add_mutually_exclusive_group() group.add_argument( "--adios", - help="Adios datasets", + help="Adios dataset", action="store_const", dest="format", const="adios", ) group.add_argument( "--pickle", - help="Pickle datasets", + help="Pickle dataset", action="store_const", dest="format", const="pickle", @@ -242,7 +242,7 @@ def get(self, idx): node_feature_names = ["atomic_number", "cartesian_coordinates", "forces"] node_feature_dims = [1, 3, 3] dirpwd = os.path.dirname(os.path.abspath(__file__)) - datadir = os.path.join(dirpwd, "datasets") + datadir = os.path.join(dirpwd, "dataset") ################################################################################################################## input_filename = os.path.join(dirpwd, args.inputfile) ################################################################################################################## @@ -305,7 +305,7 @@ def get(self, idx): ## adios if args.format == "adios": fname = os.path.join( - os.path.dirname(__file__), "./datasets/%s.bp" % modelname + os.path.dirname(__file__), "./dataset/%s.bp" % modelname ) adwriter = AdiosWriter(fname, comm) adwriter.add("trainset", trainset) @@ -319,7 +319,7 @@ def get(self, idx): ## pickle elif args.format == "pickle": basedir = os.path.join( - os.path.dirname(__file__), "datasets", "%s.pickle" % modelname + os.path.dirname(__file__), "dataset", "%s.pickle" % modelname ) attrs = dict() attrs["pna_deg"] = deg @@ -364,14 +364,14 @@ def get(self, idx): "ddstore": args.ddstore, "ddstore_width": args.ddstore_width, } - fname = os.path.join(os.path.dirname(__file__), "./datasets/%s.bp" % modelname) + fname = os.path.join(os.path.dirname(__file__), "./dataset/%s.bp" % modelname) trainset = AdiosDataset(fname, "trainset", comm, **opt, var_config=var_config) valset = AdiosDataset(fname, "valset", comm, **opt, var_config=var_config) testset = AdiosDataset(fname, "testset", comm, **opt, var_config=var_config) elif args.format == "pickle": info("Pickle load") basedir = os.path.join( - os.path.dirname(__file__), "datasets", "%s.pickle" % modelname + os.path.dirname(__file__), "dataset", "%s.pickle" % modelname ) trainset = SimplePickleDataset( basedir=basedir, label="trainset", var_config=var_config diff --git a/examples/multidataset/train.py b/examples/multidataset/train.py index d147fe464..adc008f01 100644 --- a/examples/multidataset/train.py +++ b/examples/multidataset/train.py @@ -46,7 +46,7 @@ def info(*args, logtype="info", sep=" "): parser.add_argument( "--inputfile", help="input file", type=str, default="gfm_multitasking.json" ) - parser.add_argument("--ddstore", action="store_true", help="ddstore datasets") + parser.add_argument("--ddstore", action="store_true", help="ddstore dataset") parser.add_argument("--ddstore_width", type=int, help="ddstore width", default=None) parser.add_argument("--shmem", action="store_true", help="shmem") parser.add_argument("--log", help="log name") @@ -73,21 +73,21 @@ def info(*args, logtype="info", sep=" "): group = parser.add_mutually_exclusive_group() group.add_argument( "--adios", - help="Adios datasets", + help="Adios dataset", action="store_const", dest="format", const="adios", ) group.add_argument( "--pickle", - help="Pickle datasets", + help="Pickle dataset", action="store_const", dest="format", const="pickle", ) group.add_argument( "--multi", - help="Multi datasets", + help="Multi dataset", action="store_const", dest="format", const="multi", @@ -100,7 +100,7 @@ def info(*args, logtype="info", sep=" "): node_feature_names = ["atomic_number", "cartesian_coordinates", "forces"] node_feature_dims = [1, 3, 3] dirpwd = os.path.dirname(os.path.abspath(__file__)) - datadir = os.path.join(dirpwd, "datasets") + datadir = os.path.join(dirpwd, "dataset") ################################################################################################################## input_filename = os.path.join(dirpwd, args.inputfile) ################################################################################################################## @@ -156,14 +156,14 @@ def info(*args, logtype="info", sep=" "): "ddstore": args.ddstore, "ddstore_width": args.ddstore_width, } - fname = os.path.join(os.path.dirname(__file__), "./datasets/%s.bp" % modelname) + fname = os.path.join(os.path.dirname(__file__), "./dataset/%s.bp" % modelname) trainset = AdiosDataset(fname, "trainset", comm, **opt, var_config=var_config) valset = AdiosDataset(fname, "valset", comm, **opt, var_config=var_config) testset = AdiosDataset(fname, "testset", comm, **opt, var_config=var_config) elif args.format == "pickle": info("Pickle load") basedir = os.path.join( - os.path.dirname(__file__), "datasets", "%s.pickle" % modelname + os.path.dirname(__file__), "dataset", "%s.pickle" % modelname ) trainset = SimplePickleDataset( basedir=basedir, label="trainset", var_config=var_config @@ -187,14 +187,14 @@ def info(*args, logtype="info", sep=" "): trainset.pna_deg = pna_deg elif args.format == "multi": ## Reading multiple datasets, which requires the following arguments: - ## --multi_model_list: the list datasets/model names + ## --multi_model_list: the list dataset/model names modellist = args.multi_model_list.split(",") if rank == 0: ndata_list = list() pna_deg_list = list() for model in modellist: fname = os.path.join( - os.path.dirname(__file__), "./datasets/%s.bp" % model + os.path.dirname(__file__), "./dataset/%s.bp" % model ) with ad2.open(fname, "r", MPI.COMM_SELF) as f: f.__next__() @@ -259,7 +259,7 @@ def info(*args, logtype="info", sep=" "): "pos", "y", ] - fname = os.path.join(os.path.dirname(__file__), "./datasets/%s.bp" % mymodel) + fname = os.path.join(os.path.dirname(__file__), "./dataset/%s.bp" % mymodel) trainset = AdiosDataset( fname, "trainset", diff --git a/examples/multidataset_hpo/gfm.py b/examples/multidataset_hpo/gfm.py index 3752a065a..daf2bc7f0 100644 --- a/examples/multidataset_hpo/gfm.py +++ b/examples/multidataset_hpo/gfm.py @@ -53,7 +53,7 @@ def main(): parser.add_argument("--num_headlayers", type=int, help="num_headlayers", default=2) parser.add_argument("--dim_headlayers", type=int, help="dim_headlayers", default=10) - parser.add_argument("--ddstore", action="store_true", help="ddstore datasets") + parser.add_argument("--ddstore", action="store_true", help="ddstore dataset") parser.add_argument("--ddstore_width", type=int, help="ddstore width", default=None) parser.add_argument("--shmem", action="store_true", help="shmem") parser.add_argument("--log", help="log name", default="gfm_test") @@ -74,21 +74,21 @@ def main(): group = parser.add_mutually_exclusive_group() group.add_argument( "--adios", - help="Adios datasets", + help="Adios dataset", action="store_const", dest="format", const="adios", ) group.add_argument( "--pickle", - help="Pickle datasets", + help="Pickle dataset", action="store_const", dest="format", const="pickle", ) group.add_argument( "--multi", - help="Multi datasets", + help="Multi dataset", action="store_const", dest="format", const="multi", @@ -102,7 +102,7 @@ def main(): node_feature_names = ["atomic_number", "cartesian_coordinates", "forces"] node_feature_dims = [1, 3, 3] dirpwd = os.path.dirname(os.path.abspath(__file__)) - datadir = os.path.join(dirpwd, "datasets") + datadir = os.path.join(dirpwd, "dataset") ################################################################################################################## input_filename = os.path.join(dirpwd, args.inputfile) ################################################################################################################## @@ -185,14 +185,14 @@ def main(): "ddstore": args.ddstore, "ddstore_width": args.ddstore_width, } - fname = os.path.join(os.path.dirname(__file__), "./datasets/%s.bp" % modelname) + fname = os.path.join(os.path.dirname(__file__), "./dataset/%s.bp" % modelname) trainset = AdiosDataset(fname, "trainset", comm, **opt, var_config=var_config) valset = AdiosDataset(fname, "valset", comm, **opt, var_config=var_config) testset = AdiosDataset(fname, "testset", comm, **opt, var_config=var_config) elif args.format == "pickle": info("Pickle load") basedir = os.path.join( - os.path.dirname(__file__), "datasets", "%s.pickle" % modelname + os.path.dirname(__file__), "dataset", "%s.pickle" % modelname ) trainset = SimplePickleDataset( basedir=basedir, label="trainset", var_config=var_config @@ -216,14 +216,14 @@ def main(): trainset.pna_deg = pna_deg elif args.format == "multi": ## Reading multiple datasets, which requires the following arguments: - ## --multi_model_list: the list datasets/model names + ## --multi_model_list: the list dataset/model names modellist = args.multi_model_list.split(",") if rank == 0: ndata_list = list() pna_deg_list = list() for model in modellist: fname = os.path.join( - os.path.dirname(__file__), "./datasets/%s.bp" % model + os.path.dirname(__file__), "./dataset/%s.bp" % model ) with ad2.open(fname, "r", MPI.COMM_SELF) as f: f.__next__() @@ -288,7 +288,7 @@ def main(): "pos", "y", ] - fname = os.path.join(os.path.dirname(__file__), "./datasets/%s.bp" % mymodel) + fname = os.path.join(os.path.dirname(__file__), "./dataset/%s.bp" % mymodel) trainset = AdiosDataset( fname, "trainset", diff --git a/examples/ogb/train_gap.py b/examples/ogb/train_gap.py index ba240da51..54f76e21a 100644 --- a/examples/ogb/train_gap.py +++ b/examples/ogb/train_gap.py @@ -265,20 +265,20 @@ def __getitem__(self, idx): group = parser.add_mutually_exclusive_group() group.add_argument( "--adios", - help="Adios datasets", + help="Adios dataset", action="store_const", dest="format", const="adios", ) group.add_argument( "--pickle", - help="Pickle datasets", + help="Pickle dataset", action="store_const", dest="format", const="pickle", ) group.add_argument( - "--csv", help="CSV datasets", action="store_const", dest="format", const="csv" + "--csv", help="CSV dataset", action="store_const", dest="format", const="csv" ) parser.add_argument( "--use_deepspeed", @@ -292,7 +292,7 @@ def __getitem__(self, idx): graph_feature_names = ["GAP"] graph_feature_dim = [1] dirpwd = os.path.dirname(os.path.abspath(__file__)) - datadir = os.path.join(dirpwd, "datasets/") + datadir = os.path.join(dirpwd, "dataset/") ################################################################################################################## inputfilesubstr = args.inputfilesubstr input_filename = os.path.join(dirpwd, "ogb_" + inputfilesubstr + ".json") @@ -362,7 +362,7 @@ def __getitem__(self, idx): ## pickle basedir = os.path.join( - os.path.dirname(__file__), "datasets", "%s.pickle" % modelname + os.path.dirname(__file__), "dataset", "%s.pickle" % modelname ) attrs = dict() attrs["pna_deg"] = deg @@ -393,7 +393,7 @@ def __getitem__(self, idx): ) if args.format == "adios": - fname = os.path.join(os.path.dirname(__file__), "datasets", "ogb_gap.bp") + fname = os.path.join(os.path.dirname(__file__), "dataset", "ogb_gap.bp") adwriter = AdiosWriter(fname, comm) adwriter.add("trainset", trainset) adwriter.add("valset", valset) @@ -409,12 +409,12 @@ def __getitem__(self, idx): opt = {"preload": True, "shmem": False} if args.shmem: opt = {"preload": False, "shmem": True} - fname = os.path.join(os.path.dirname(__file__), "datasets", "ogb_gap.bp") + fname = os.path.join(os.path.dirname(__file__), "dataset", "ogb_gap.bp") trainset = AdiosDataset(fname, "trainset", comm, **opt) valset = AdiosDataset(fname, "valset", comm, **opt) testset = AdiosDataset(fname, "testset", comm, **opt) elif args.format == "csv": - fname = os.path.join(os.path.dirname(__file__), "datasets", "pcqm4m_gap.csv") + fname = os.path.join(os.path.dirname(__file__), "dataset", "pcqm4m_gap.csv") fact = OGBRawDatasetFactory( fname, var_config=var_config, sampling=args.sampling ) @@ -424,7 +424,7 @@ def __getitem__(self, idx): elif args.format == "pickle": info("Pickle load") basedir = os.path.join( - os.path.dirname(__file__), "datasets", "%s.pickle" % modelname + os.path.dirname(__file__), "dataset", "%s.pickle" % modelname ) trainset = SimplePickleDataset( basedir=basedir, label="trainset", var_config=var_config diff --git a/examples/open_catalyst_2020/train.py b/examples/open_catalyst_2020/train.py index ad5097a2f..c920d9002 100644 --- a/examples/open_catalyst_2020/train.py +++ b/examples/open_catalyst_2020/train.py @@ -155,7 +155,7 @@ def get(self, idx): type=bool, default=True, ) - parser.add_argument("--ddstore", action="store_true", help="ddstore datasets") + parser.add_argument("--ddstore", action="store_true", help="ddstore dataset") parser.add_argument("--ddstore_width", type=int, help="ddstore width", default=None) parser.add_argument("--shmem", action="store_true", help="shmem") parser.add_argument("--log", help="log name") @@ -167,14 +167,14 @@ def get(self, idx): group = parser.add_mutually_exclusive_group() group.add_argument( "--adios", - help="Adios datasets", + help="Adios dataset", action="store_const", dest="format", const="adios", ) group.add_argument( "--pickle", - help="Pickle datasets", + help="Pickle dataset", action="store_const", dest="format", const="pickle", @@ -259,7 +259,7 @@ def get(self, idx): ## adios if args.format == "adios": fname = os.path.join( - os.path.dirname(__file__), "./datasets/%s.bp" % modelname + os.path.dirname(__file__), "./dataset/%s.bp" % modelname ) adwriter = AdiosWriter(fname, comm) adwriter.add("trainset", trainset) @@ -318,7 +318,7 @@ def get(self, idx): "ddstore": args.ddstore, "ddstore_width": args.ddstore_width, } - fname = os.path.join(os.path.dirname(__file__), "./datasets/%s.bp" % modelname) + fname = os.path.join(os.path.dirname(__file__), "./dataset/%s.bp" % modelname) trainset = AdiosDataset(fname, "trainset", comm, **opt, var_config=var_config) valset = AdiosDataset(fname, "valset", comm, **opt, var_config=var_config) testset = AdiosDataset(fname, "testset", comm, **opt, var_config=var_config) diff --git a/examples/open_catalyst_2022/train.py b/examples/open_catalyst_2022/train.py index 06a945034..ea109222a 100644 --- a/examples/open_catalyst_2022/train.py +++ b/examples/open_catalyst_2022/train.py @@ -214,7 +214,7 @@ def get(self, idx): type=bool, default=True, ) - parser.add_argument("--ddstore", action="store_true", help="ddstore datasets") + parser.add_argument("--ddstore", action="store_true", help="ddstore dataset") parser.add_argument("--ddstore_width", type=int, help="ddstore width", default=None) parser.add_argument("--shmem", action="store_true", help="shmem") parser.add_argument("--log", help="log name") @@ -225,14 +225,14 @@ def get(self, idx): group = parser.add_mutually_exclusive_group() group.add_argument( "--adios", - help="Adios datasets", + help="Adios dataset", action="store_const", dest="format", const="adios", ) group.add_argument( "--pickle", - help="Pickle datasets", + help="Pickle dataset", action="store_const", dest="format", const="pickle", @@ -245,7 +245,7 @@ def get(self, idx): node_feature_names = ["atomic_number", "cartesian_coordinates", "forces"] node_feature_dims = [1, 3, 3] dirpwd = os.path.dirname(os.path.abspath(__file__)) - datadir = os.path.join(dirpwd, "datasets") + datadir = os.path.join(dirpwd, "dataset") ################################################################################################################## input_filename = os.path.join(dirpwd, args.inputfile) ################################################################################################################## @@ -321,7 +321,7 @@ def get(self, idx): ## adios if args.format == "adios": fname = os.path.join( - os.path.dirname(__file__), "./datasets/%s.bp" % modelname + os.path.dirname(__file__), "./dataset/%s.bp" % modelname ) adwriter = AdiosWriter(fname, comm) adwriter.add("trainset", trainset) @@ -335,7 +335,7 @@ def get(self, idx): ## pickle elif args.format == "pickle": basedir = os.path.join( - os.path.dirname(__file__), "datasets", "%s.pickle" % modelname + os.path.dirname(__file__), "dataset", "%s.pickle" % modelname ) attrs = dict() attrs["pna_deg"] = deg @@ -380,14 +380,14 @@ def get(self, idx): "ddstore": args.ddstore, "ddstore_width": args.ddstore_width, } - fname = os.path.join(os.path.dirname(__file__), "./datasets/%s.bp" % modelname) + fname = os.path.join(os.path.dirname(__file__), ".//%s.bp" % modelname) trainset = AdiosDataset(fname, "trainset", comm, **opt, var_config=var_config) valset = AdiosDataset(fname, "valset", comm, **opt, var_config=var_config) testset = AdiosDataset(fname, "testset", comm, **opt, var_config=var_config) elif args.format == "pickle": info("Pickle load") basedir = os.path.join( - os.path.dirname(__file__), "datasets", "%s.pickle" % modelname + os.path.dirname(__file__), "dataset", "%s.pickle" % modelname ) trainset = SimplePickleDataset( basedir=basedir, label="trainset", var_config=var_config diff --git a/examples/qm7x/train.py b/examples/qm7x/train.py index 56db1d5d3..2a72c815f 100644 --- a/examples/qm7x/train.py +++ b/examples/qm7x/train.py @@ -200,7 +200,7 @@ def hdf5_to_graph(self, fMOL, molid): # check forces values assert self.check_forces_values( forces - ), f"qm7x datasets - molid:{molid} - confid:{confid} - L2-norm of atomic forces exceeds {self.forces_norm_threshold}" + ), f"qm7x dataset - molid:{molid} - confid:{confid} - L2-norm of atomic forces exceeds {self.forces_norm_threshold}" if self.energy_per_atom: energy = EPBE0 / natoms @@ -249,7 +249,7 @@ def get(self, idx): default=True, ) - parser.add_argument("--ddstore", action="store_true", help="ddstore datasets") + parser.add_argument("--ddstore", action="store_true", help="ddstore dataset") parser.add_argument("--ddstore_width", type=int, help="ddstore width", default=None) parser.add_argument("--shmem", action="store_true", help="shmem") parser.add_argument("--log", help="log name") @@ -259,14 +259,14 @@ def get(self, idx): group = parser.add_mutually_exclusive_group() group.add_argument( "--adios", - help="Adios datasets", + help="Adios dataset", action="store_const", dest="format", const="adios", ) group.add_argument( "--pickle", - help="Pickle datasets", + help="Pickle dataset", action="store_const", dest="format", const="pickle", @@ -349,7 +349,7 @@ def get(self, idx): ## adios if args.format == "adios": fname = os.path.join( - os.path.dirname(__file__), "./datasets/%s.bp" % modelname + os.path.dirname(__file__), "./dataset/%s.bp" % modelname ) adwriter = AdiosWriter(fname, comm) adwriter.add("trainset", trainset) @@ -408,7 +408,7 @@ def get(self, idx): "ddstore": args.ddstore, "ddstore_width": args.ddstore_width, } - fname = os.path.join(os.path.dirname(__file__), "./datasets/%s.bp" % modelname) + fname = os.path.join(os.path.dirname(__file__), "./dataset/%s.bp" % modelname) trainset = AdiosDataset(fname, "trainset", comm, **opt, var_config=var_config) valset = AdiosDataset(fname, "valset", comm, **opt, var_config=var_config) testset = AdiosDataset(fname, "testset", comm, **opt, var_config=var_config) diff --git a/examples/qm9/qm9.py b/examples/qm9/qm9.py index 953402b26..1be651f97 100644 --- a/examples/qm9/qm9.py +++ b/examples/qm9/qm9.py @@ -58,7 +58,7 @@ def qm9_pre_filter(data): # NOTE: data is moved to the device in the pre-transform. # NOTE: transforms/filters will NOT be re-run unless the qm9/processed/ directory is removed. dataset = torch_geometric.datasets.QM9( - root="datasets/qm9", pre_transform=qm9_pre_transform, pre_filter=qm9_pre_filter + root="dataset/qm9", pre_transform=qm9_pre_transform, pre_filter=qm9_pre_filter ) train, val, test = hydragnn.preprocess.split_dataset( dataset, config["NeuralNetwork"]["Training"]["perc_train"], False diff --git a/examples/qm9_hpo/qm9.py b/examples/qm9_hpo/qm9.py index 83bdf1a83..95ace3382 100644 --- a/examples/qm9_hpo/qm9.py +++ b/examples/qm9_hpo/qm9.py @@ -80,7 +80,7 @@ def qm9_pre_filter(data): # NOTE: data is moved to the device in the pre-transform. # NOTE: transforms/filters will NOT be re-run unless the qm9/processed/ directory is removed. dataset = torch_geometric.datasets.QM9( - root="datasets/qm9", pre_transform=qm9_pre_transform, pre_filter=qm9_pre_filter + root="dataset/qm9", pre_transform=qm9_pre_transform, pre_filter=qm9_pre_filter ) train, val, test = hydragnn.preprocess.split_dataset( dataset, config["NeuralNetwork"]["Training"]["perc_train"], False diff --git a/examples/qm9_hpo/qm9_deephyper.py b/examples/qm9_hpo/qm9_deephyper.py index f8ab91826..51031c27a 100644 --- a/examples/qm9_hpo/qm9_deephyper.py +++ b/examples/qm9_hpo/qm9_deephyper.py @@ -140,7 +140,7 @@ def run(trial): # NOTE: data is moved to the device in the pre-transform. # NOTE: transforms/filters will NOT be re-run unless the qm9/processed/ directory is removed. dataset = torch_geometric.datasets.QM9( - root="datasets/qm9", pre_transform=qm9_pre_transform + root="dataset/qm9", pre_transform=qm9_pre_transform ) trainset, valset, testset = hydragnn.preprocess.split_dataset(dataset, 0.8, False) diff --git a/examples/qm9_hpo/qm9_optuna.py b/examples/qm9_hpo/qm9_optuna.py index 07057bf60..0b57f9e83 100644 --- a/examples/qm9_hpo/qm9_optuna.py +++ b/examples/qm9_hpo/qm9_optuna.py @@ -176,7 +176,7 @@ def objective(trial): # NOTE: data is moved to the device in the pre-transform. # NOTE: transforms/filters will NOT be re-run unless the qm9/processed/ directory is removed. dataset = torch_geometric.datasets.QM9( - root="datasets/qm9", pre_transform=qm9_pre_transform + root="dataset/qm9", pre_transform=qm9_pre_transform ) trainset, valset, testset = hydragnn.preprocess.split_dataset( dataset, config["NeuralNetwork"]["Training"]["perc_train"], False From 4b5b98edee7b01089ea06300369624ab04b95a69 Mon Sep 17 00:00:00 2001 From: allaffa Date: Fri, 23 Aug 2024 12:23:43 -0400 Subject: [PATCH 10/28] reverting inadvertent automated refactoring of dataset forlder into datasets --- examples/alexandria/find_json_files.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/alexandria/find_json_files.py b/examples/alexandria/find_json_files.py index acbaee0ef..0801efb5e 100644 --- a/examples/alexandria/find_json_files.py +++ b/examples/alexandria/find_json_files.py @@ -24,7 +24,7 @@ def find_json_files(url): url_root = "https://alexandria.icams.rub.de/data" # Replace with the actual URL -dirpath = "datasets/compressed_data" +dirpath = "dataset/compressed_data" if os.path.exists(dirpath) and os.path.isdir(dirpath): shutil.rmtree(dirpath) From d963e81e484efe1d9e4d4a0cdfbc004c5ead8593 Mon Sep 17 00:00:00 2001 From: allaffa Date: Fri, 23 Aug 2024 12:25:09 -0400 Subject: [PATCH 11/28] reverting inadvertent automated refactoring of dataset forlder into datasets --- examples/alexandria/train.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/alexandria/train.py b/examples/alexandria/train.py index 0dfbfd56a..7382d2bb8 100644 --- a/examples/alexandria/train.py +++ b/examples/alexandria/train.py @@ -250,7 +250,7 @@ def get_magmoms_array_from_structure(structure): def process_file_content(self, filepath): """ - Download a file from a datasets of the Alexandria database with the respective index + Download a file from a dataset of the Alexandria database with the respective index and write it to the LMDB file with the respective index. Parameters From dfd3942576ae22d24eb42676c17d4457908971ad Mon Sep 17 00:00:00 2001 From: allaffa Date: Fri, 23 Aug 2024 12:26:31 -0400 Subject: [PATCH 12/28] reverting inadvertent automated refactoring of dataset forlder into datasets --- examples/ani1_x/train.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/ani1_x/train.py b/examples/ani1_x/train.py index 68cb00f35..58d6cb93b 100644 --- a/examples/ani1_x/train.py +++ b/examples/ani1_x/train.py @@ -214,7 +214,7 @@ def get(self, idx): ) group.add_argument( "--pickle", - help="Pickle ", + help="Pickle dataset", action="store_const", dest="format", const="pickle", From 3d737919bc78ab3b87d815f22941da4706d9c181 Mon Sep 17 00:00:00 2001 From: allaffa Date: Fri, 23 Aug 2024 12:29:15 -0400 Subject: [PATCH 13/28] reverting inadvertent automated refactoring of hydragnn into hhydragnn package --- examples/ani1_x/train.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/ani1_x/train.py b/examples/ani1_x/train.py index 58d6cb93b..100ad56d2 100644 --- a/examples/ani1_x/train.py +++ b/examples/ani1_x/train.py @@ -399,7 +399,7 @@ def get(self, idx): ## Good to sync with everyone right after DDStore setup comm.Barrier() - hhydragnn.utils.input_config_parsing.save_config(config, log_name) + hydragnn.utils.input_config_parsing.save_config(config, log_name) timer.stop() From 708e393b37010c1e86a2feb02d1d8dce741ea42c Mon Sep 17 00:00:00 2001 From: allaffa Date: Fri, 23 Aug 2024 13:09:37 -0400 Subject: [PATCH 14/28] reverting inadvertent automated refactoring of dataset forlder into datasets --- .../train_discrete_uv_spectrum.py | 40 +++++++++---------- 1 file changed, 18 insertions(+), 22 deletions(-) diff --git a/examples/dftb_uv_spectrum/train_discrete_uv_spectrum.py b/examples/dftb_uv_spectrum/train_discrete_uv_spectrum.py index 2540893ae..11fbefac3 100644 --- a/examples/dftb_uv_spectrum/train_discrete_uv_spectrum.py +++ b/examples/dftb_uv_spectrum/train_discrete_uv_spectrum.py @@ -5,30 +5,25 @@ import os, json import random -import pickle import logging import sys -from tqdm import tqdm from mpi4py import MPI -from itertools import chain import argparse -import time from rdkit.Chem.rdmolfiles import MolFromPDBFile import hydragnn -from hydragnn.utils.print_utils import print_distributed, iterate_tqdm, log -from hydragnn.utils.time_utils import Timer -from hydragnn.utils.pickledataset import SimplePickleDataset -from hydragnn.utils.smiles_utils import ( +from hydragnn.utils.print.print_utils import print_distributed, iterate_tqdm, log +from hydragnn.utils.profiling_and_tracing.time_utils import Timer +from hydragnn.utils.descriptors_and_embeddings.smiles_utils import ( get_node_attribute_name, generate_graphdata_from_rdkit_molecule, ) from hydragnn.utils.distributed import get_device from hydragnn.preprocess.load_data import split_dataset -from hydragnn.utils.distdataset import DistDataset -from hydragnn.utils.pickledataset import SimplePickleWriter, SimplePickleDataset +from hydragnn.utils.datasets.distdataset import DistDataset +from hydragnn.utils.datasets.pickledataset import SimplePickleWriter, SimplePickleDataset from hydragnn.preprocess.graph_samples_checks_and_updates import gather_deg import numpy as np @@ -44,9 +39,9 @@ import warnings -from hydragnn.utils import nsplit -import hydragnn.utils.tracer as tr -from hydragnn.utils.abstractbasedataset import AbstractBaseDataset +from hydragnn.utils.distributed import nsplit +import hydragnn.utils.profiling_and_tracing.tracer as tr +from hydragnn.utils.datasets.abstractbasedataset import AbstractBaseDataset # FIXME: this works fine for now because we train on GDB-9 molecules # for larger chemical spaces, the following atom representation has to be properly expanded @@ -58,6 +53,7 @@ def info(*args, logtype="info", sep=" "): def dftb_to_graph(moldir, dftb_node_types, var_config): + pdb_filename = os.path.join(moldir, "smiles.pdb") pdb_filename = os.path.join(moldir, "smiles.pdb") mol = MolFromPDBFile( pdb_filename, sanitize=False, proximityBonding=True, removeHs=True @@ -75,7 +71,7 @@ def dftb_to_graph(moldir, dftb_node_types, var_config): class DFTBDataset(AbstractBaseDataset): - """DFTBDataset datasets class""" + """DFTBDataset dataset class""" def __init__(self, dirpath, dftb_node_types, var_config, dist=False, sampling=None): super().__init__() @@ -138,7 +134,7 @@ def get(self, idx): help="preprocess only (no training)", ) parser.add_argument("--mae", action="store_true", help="do mae calculation") - parser.add_argument("--ddstore", action="store_true", help="ddstore datasets") + parser.add_argument("--ddstore", action="store_true", help="ddstore dataset") parser.add_argument("--ddstore_width", type=int, help="ddstore width", default=None) parser.add_argument("--shmem", action="store_true", help="shmem") parser.add_argument("--log", help="log name") @@ -148,14 +144,14 @@ def get(self, idx): group = parser.add_mutually_exclusive_group() group.add_argument( "--adios", - help="Adios datasets", + help="Adios dataset", action="store_const", dest="format", const="adios", ) group.add_argument( "--pickle", - help="Pickle datasets", + help="Pickle dataset", action="store_const", dest="format", const="pickle", @@ -166,7 +162,7 @@ def get(self, idx): graph_feature_names = ["frequencies", "intensities"] graph_feature_dim = [50, 50] dirpwd = os.path.dirname(os.path.abspath(__file__)) - datafile = os.path.join(dirpwd, "datasets/dftb_aisd_electronic_excitation_spectrum") + datafile = os.path.join(dirpwd, "dataset/dftb_aisd_electronic_excitation_spectrum") ################################################################################################################## input_filename = os.path.join(dirpwd, "dftb_discrete_uv_spectrum.json") ################################################################################################################## @@ -227,7 +223,7 @@ def get(self, idx): config["pna_deg"] = deg ## adios - fname = os.path.join(os.path.dirname(__file__), "./datasets/%s.bp" % modelname) + fname = os.path.join(os.path.dirname(__file__), "./dataset/%s.bp" % modelname) adwriter = AdiosWriter(fname, comm) adwriter.add("trainset", trainset) adwriter.add("valset", valset) @@ -239,7 +235,7 @@ def get(self, idx): ## pickle basedir = os.path.join( - os.path.dirname(__file__), "datasets", "%s.pickle" % modelname + os.path.dirname(__file__), "dataset", "%s.pickle" % modelname ) attrs = dict() attrs["pna_deg"] = deg @@ -283,14 +279,14 @@ def get(self, idx): "ddstore": args.ddstore, "ddstore_width": args.ddstore_width, } - fname = os.path.join(os.path.dirname(__file__), "./datasets/%s.bp" % modelname) + fname = os.path.join(os.path.dirname(__file__), "./dataset/%s.bp" % modelname) trainset = AdiosDataset(fname, "trainset", comm, **opt) valset = AdiosDataset(fname, "valset", comm, **opt) testset = AdiosDataset(fname, "testset", comm, **opt) elif args.format == "pickle": info("Pickle load") basedir = os.path.join( - os.path.dirname(__file__), "datasets", "%s.pickle" % modelname + os.path.dirname(__file__), "dataset", "%s.pickle" % modelname ) trainset = SimplePickleDataset(basedir, "trainset") valset = SimplePickleDataset(basedir, "valset") From 2f819ebf728ee9101ac1365a2a1ece87c422daac Mon Sep 17 00:00:00 2001 From: allaffa Date: Fri, 23 Aug 2024 13:13:13 -0400 Subject: [PATCH 15/28] reverting inadvertent automated refactoring of dataset forlder into datasets --- .../train_smooth_uv_spectrum.py | 40 ++++++++----------- 1 file changed, 17 insertions(+), 23 deletions(-) diff --git a/examples/dftb_uv_spectrum/train_smooth_uv_spectrum.py b/examples/dftb_uv_spectrum/train_smooth_uv_spectrum.py index fe721822e..3e8d06dcd 100644 --- a/examples/dftb_uv_spectrum/train_smooth_uv_spectrum.py +++ b/examples/dftb_uv_spectrum/train_smooth_uv_spectrum.py @@ -5,32 +5,27 @@ import os, json import random -import pickle import logging import sys -from tqdm import tqdm from mpi4py import MPI -from itertools import chain import argparse -import time from rdkit.Chem.rdmolfiles import MolFromPDBFile import hydragnn -from hydragnn.utils.print_utils import print_distributed, iterate_tqdm, log -from hydragnn.utils.time_utils import Timer +from hydragnn.utils.print.print_utils import print_distributed, iterate_tqdm, log +from hydragnn.utils.profiling_and_tracing.time_utils import Timer # from hydragnn.utils.adiosdataset import AdiosWriter, AdiosDataset -from hydragnn.utils.pickledataset import SimplePickleDataset -from hydragnn.utils.smiles_utils import ( +from hydragnn.utils.descriptors_and_embeddings.smiles_utils import ( get_node_attribute_name, generate_graphdata_from_rdkit_molecule, ) from hydragnn.utils.distributed import get_device from hydragnn.preprocess.load_data import split_dataset -from hydragnn.utils.distdataset import DistDataset -from hydragnn.utils.pickledataset import SimplePickleWriter, SimplePickleDataset +from hydragnn.utils.datasets.distdataset import DistDataset +from hydragnn.utils.datasets.pickledataset import SimplePickleWriter, SimplePickleDataset from hydragnn.preprocess.graph_samples_checks_and_updates import gather_deg import numpy as np @@ -40,12 +35,11 @@ except ImportError: pass -import torch_geometric.data import torch import torch.distributed as dist -from hydragnn.utils import nsplit -import hydragnn.utils.tracer as tr +from hydragnn.utils.distributed import nsplit +import hydragnn.utils.profiling_and_tracing.tracer as tr # FIXME: this works fine for now because we train on GDB-9 molecules # for larger chemical spaces, the following atom representation has to be properly expanded @@ -56,7 +50,7 @@ def info(*args, logtype="info", sep=" "): getattr(logging, logtype)(sep.join(map(str, args))) -from hydragnn.utils.abstractbasedataset import AbstractBaseDataset +from hydragnn.utils.datasets.abstractbasedataset import AbstractBaseDataset def dftb_to_graph(moldir, dftb_node_types, var_config): @@ -75,7 +69,7 @@ def dftb_to_graph(moldir, dftb_node_types, var_config): class DFTBDataset(AbstractBaseDataset): - """DFTBDataset datasets class""" + """DFTBDataset dataset class""" def __init__(self, dirpath, dftb_node_types, var_config, dist=False, sampling=None): super().__init__() @@ -138,7 +132,7 @@ def get(self, idx): help="preprocess only (no training)", ) parser.add_argument("--mae", action="store_true", help="do mae calculation") - parser.add_argument("--ddstore", action="store_true", help="ddstore datasets") + parser.add_argument("--ddstore", action="store_true", help="ddstore dataset") parser.add_argument("--ddstore_width", type=int, help="ddstore width", default=None) parser.add_argument("--shmem", action="store_true", help="shmem") parser.add_argument("--log", help="log name") @@ -148,14 +142,14 @@ def get(self, idx): group = parser.add_mutually_exclusive_group() group.add_argument( "--adios", - help="Adios datasets", + help="Adios dataset", action="store_const", dest="format", const="adios", ) group.add_argument( "--pickle", - help="Pickle datasets", + help="Pickle dataset", action="store_const", dest="format", const="pickle", @@ -166,7 +160,7 @@ def get(self, idx): graph_feature_names = ["spectrum"] graph_feature_dim = [37500] dirpwd = os.path.dirname(os.path.abspath(__file__)) - datafile = os.path.join(dirpwd, "datasets/dftb_aisd_electronic_excitation_spectrum") + datafile = os.path.join(dirpwd, "dataset/dftb_aisd_electronic_excitation_spectrum") ################################################################################################################## input_filename = os.path.join(dirpwd, "dftb_smooth_uv_spectrum.json") ################################################################################################################## @@ -227,7 +221,7 @@ def get(self, idx): config["pna_deg"] = deg ## adios - fname = os.path.join(os.path.dirname(__file__), "./datasets/%s.bp" % modelname) + fname = os.path.join(os.path.dirname(__file__), "./dataset/%s.bp" % modelname) adwriter = AdiosWriter(fname, comm) adwriter.add("trainset", trainset) adwriter.add("valset", valset) @@ -239,7 +233,7 @@ def get(self, idx): ## pickle basedir = os.path.join( - os.path.dirname(__file__), "datasets", "%s.pickle" % modelname + os.path.dirname(__file__), "dataset", "%s.pickle" % modelname ) attrs = dict() attrs["pna_deg"] = deg @@ -283,14 +277,14 @@ def get(self, idx): "ddstore": args.ddstore, "ddstore_width": args.ddstore_width, } - fname = os.path.join(os.path.dirname(__file__), "./datasets/%s.bp" % modelname) + fname = os.path.join(os.path.dirname(__file__), "./dataset/%s.bp" % modelname) trainset = AdiosDataset(fname, "trainset", comm, **opt) valset = AdiosDataset(fname, "valset", comm, **opt) testset = AdiosDataset(fname, "testset", comm, **opt) elif args.format == "pickle": info("Pickle load") basedir = os.path.join( - os.path.dirname(__file__), "datasets", "%s.pickle" % modelname + os.path.dirname(__file__), "dataset", "%s.pickle" % modelname ) trainset = SimplePickleDataset(basedir, "trainset") valset = SimplePickleDataset(basedir, "valset") From c50f93efb20f58e0bf918924e981861a99472296 Mon Sep 17 00:00:00 2001 From: allaffa Date: Fri, 23 Aug 2024 13:14:27 -0400 Subject: [PATCH 16/28] reverting inadvertent automated refactoring of dataset forlder into datasets --- examples/ising_model/train_ising.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/ising_model/train_ising.py b/examples/ising_model/train_ising.py index c2279a627..01cdbcd07 100644 --- a/examples/ising_model/train_ising.py +++ b/examples/ising_model/train_ising.py @@ -226,7 +226,7 @@ def info(*args, logtype="info", sep=" "): """ Parallel ising data generation step: 1. Generate ising data (*.txt) in parallel (create_dataset_mpi) - 2. Read raw datasets in parallel (*.txt) (RawDataset) + 2. Read raw dataset in parallel (*.txt) (RawDataset) 3. Split into a train, valid, and test set (split_dataset) 4. Save as Adios file in parallel """ From 2ec962fc0d5bae77644a0e7f5858dbf8d1d8da84 Mon Sep 17 00:00:00 2001 From: allaffa Date: Fri, 23 Aug 2024 14:21:17 -0400 Subject: [PATCH 17/28] git formatting fixed --- examples/dftb_uv_spectrum/train_discrete_uv_spectrum.py | 5 ++++- examples/dftb_uv_spectrum/train_smooth_uv_spectrum.py | 5 ++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/examples/dftb_uv_spectrum/train_discrete_uv_spectrum.py b/examples/dftb_uv_spectrum/train_discrete_uv_spectrum.py index 11fbefac3..0f6484911 100644 --- a/examples/dftb_uv_spectrum/train_discrete_uv_spectrum.py +++ b/examples/dftb_uv_spectrum/train_discrete_uv_spectrum.py @@ -23,7 +23,10 @@ from hydragnn.utils.distributed import get_device from hydragnn.preprocess.load_data import split_dataset from hydragnn.utils.datasets.distdataset import DistDataset -from hydragnn.utils.datasets.pickledataset import SimplePickleWriter, SimplePickleDataset +from hydragnn.utils.datasets.pickledataset import ( + SimplePickleWriter, + SimplePickleDataset, +) from hydragnn.preprocess.graph_samples_checks_and_updates import gather_deg import numpy as np diff --git a/examples/dftb_uv_spectrum/train_smooth_uv_spectrum.py b/examples/dftb_uv_spectrum/train_smooth_uv_spectrum.py index 3e8d06dcd..4233d9c05 100644 --- a/examples/dftb_uv_spectrum/train_smooth_uv_spectrum.py +++ b/examples/dftb_uv_spectrum/train_smooth_uv_spectrum.py @@ -25,7 +25,10 @@ from hydragnn.utils.distributed import get_device from hydragnn.preprocess.load_data import split_dataset from hydragnn.utils.datasets.distdataset import DistDataset -from hydragnn.utils.datasets.pickledataset import SimplePickleWriter, SimplePickleDataset +from hydragnn.utils.datasets.pickledataset import ( + SimplePickleWriter, + SimplePickleDataset, +) from hydragnn.preprocess.graph_samples_checks_and_updates import gather_deg import numpy as np From bcd91ff286ecdcfae6a4da696296754b345779f2 Mon Sep 17 00:00:00 2001 From: allaffa Date: Sat, 24 Aug 2024 11:39:36 -0400 Subject: [PATCH 18/28] Adagrad converted to Adamax --- hydragnn/utils/optimizer/optimizer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hydragnn/utils/optimizer/optimizer.py b/hydragnn/utils/optimizer/optimizer.py index af2fdcc32..c4f6e8bef 100644 --- a/hydragnn/utils/optimizer/optimizer.py +++ b/hydragnn/utils/optimizer/optimizer.py @@ -70,7 +70,7 @@ def select_zero_redundancy_optimizer(model, config): elif config["type"] == "Adamax": optimizer = ZeroRedundancyOptimizer( model.parameters(), - optimizer_class=torch.optim.Adagrad, + optimizer_class=torch.optim.Adamax, lr=config["learning_rate"], ) elif config["type"] == "AdamW": From a0c932aabcbd105f4fe792ab6c42ca9c1519b4da Mon Sep 17 00:00:00 2001 From: allaffa Date: Tue, 3 Sep 2024 09:16:35 -0400 Subject: [PATCH 19/28] Additional changes to fix bugs and suggestions from erdem --- examples/ising_model/train_ising.py | 2 +- examples/multidataset/energy_linear_regression.py | 11 ++++++++--- examples/ogb/train_gap.py | 2 +- examples/qm7x/train.py | 2 +- hydragnn/preprocess/stratified_sampling.py | 2 +- {utils => hydragnn/utils}/lsms/__init__.py | 0 .../utils}/lsms/compositional_histogram_cutoff.py | 0 .../lsms/convert_total_energy_to_formation_gibbs.py | 0 tests/test_enthalpy.py | 4 ++-- utils/__init__.py | 1 - 10 files changed, 14 insertions(+), 10 deletions(-) rename {utils => hydragnn/utils}/lsms/__init__.py (100%) rename {utils => hydragnn/utils}/lsms/compositional_histogram_cutoff.py (100%) rename {utils => hydragnn/utils}/lsms/convert_total_energy_to_formation_gibbs.py (100%) delete mode 100644 utils/__init__.py diff --git a/examples/ising_model/train_ising.py b/examples/ising_model/train_ising.py index 01cdbcd07..3b8c859bd 100644 --- a/examples/ising_model/train_ising.py +++ b/examples/ising_model/train_ising.py @@ -27,7 +27,7 @@ import numpy as np try: - from hydragnn.utils.adiosdataset import AdiosWriter, AdiosDataset + from hydragnn.utils.datasets.adiosdataset import AdiosWriter, AdiosDataset except ImportError: pass diff --git a/examples/multidataset/energy_linear_regression.py b/examples/multidataset/energy_linear_regression.py index 73a15ed3c..0b81c578b 100644 --- a/examples/multidataset/energy_linear_regression.py +++ b/examples/multidataset/energy_linear_regression.py @@ -7,10 +7,15 @@ import numpy as np import hydragnn -from hydragnn.utils import nsplit -from hydragnn.utils.adiosdataset import AdiosWriter, AdiosDataset +from hydragnn.utils.distributed import nsplit +from hydragnn.utils.datasets.adiosdataset import AdiosWriter, AdiosDataset from tqdm import tqdm -from mpi_list import Context, DFM + +# This import requires having installed the package mpi_list +try: + from mpi_list import Context, DFM +except ImportError: + print("mpi_list requires having installed: https://github.com/frobnitzem/mpi_list") def subset(i): diff --git a/examples/ogb/train_gap.py b/examples/ogb/train_gap.py index 54f76e21a..5b54763b3 100644 --- a/examples/ogb/train_gap.py +++ b/examples/ogb/train_gap.py @@ -32,7 +32,7 @@ import numpy as np try: - from hydragnn.utils.adiosdataset import AdiosWriter, AdiosDataset + from hydragnn.utils.datasets.adiosdataset import AdiosWriter, AdiosDataset except ImportError: pass diff --git a/examples/qm7x/train.py b/examples/qm7x/train.py index 2a72c815f..e8e9f0334 100644 --- a/examples/qm7x/train.py +++ b/examples/qm7x/train.py @@ -68,7 +68,7 @@ def info(*args, logtype="info", sep=" "): getattr(logging, logtype)(sep.join(map(str, args))) -from hydragnn.utils.datasets import AbstractBaseDataset +from hydragnn.utils.datasets.abstractbasedataset import AbstractBaseDataset # FIXME: this radis cutoff overwrites the radius cutoff currently written in the JSON file create_graph_fromXYZ = RadiusGraph(r=5.0) # radius cutoff in angstrom diff --git a/hydragnn/preprocess/stratified_sampling.py b/hydragnn/preprocess/stratified_sampling.py index 3072ff4d5..ebc68b2ed 100644 --- a/hydragnn/preprocess/stratified_sampling.py +++ b/hydragnn/preprocess/stratified_sampling.py @@ -39,7 +39,7 @@ def stratified_sampling(dataset: [Data], subsample_percentage: float, verbosity= n_splits=1, train_size=subsample_percentage, random_state=0 ) - for subsample_index, rest_of_data_index in sss.split(dataset, dataset_categories): + for subsample_index, _ in sss.split(dataset, dataset_categories): subsample_indices = subsample_index.tolist() for index in subsample_indices: diff --git a/utils/lsms/__init__.py b/hydragnn/utils/lsms/__init__.py similarity index 100% rename from utils/lsms/__init__.py rename to hydragnn/utils/lsms/__init__.py diff --git a/utils/lsms/compositional_histogram_cutoff.py b/hydragnn/utils/lsms/compositional_histogram_cutoff.py similarity index 100% rename from utils/lsms/compositional_histogram_cutoff.py rename to hydragnn/utils/lsms/compositional_histogram_cutoff.py diff --git a/utils/lsms/convert_total_energy_to_formation_gibbs.py b/hydragnn/utils/lsms/convert_total_energy_to_formation_gibbs.py similarity index 100% rename from utils/lsms/convert_total_energy_to_formation_gibbs.py rename to hydragnn/utils/lsms/convert_total_energy_to_formation_gibbs.py diff --git a/tests/test_enthalpy.py b/tests/test_enthalpy.py index 4fd7ac04c..45a98293c 100644 --- a/tests/test_enthalpy.py +++ b/tests/test_enthalpy.py @@ -11,9 +11,9 @@ import os import numpy as np -import hydragnn, tests +import tests import pytest -from utils.lsms import ( +from hydragnn.utils.lsms import ( convert_raw_data_energy_to_gibbs, ) diff --git a/utils/__init__.py b/utils/__init__.py deleted file mode 100644 index 69448aef7..000000000 --- a/utils/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from . import lsms From 9322f7a3ede4359aa2cd9ea13af9022816619993 Mon Sep 17 00:00:00 2001 From: allaffa Date: Thu, 19 Sep 2024 22:16:45 -0400 Subject: [PATCH 20/28] imports fixed for LennardJones example --- examples/LennardJones/LJ_inference_plots.py | 8 ++++---- examples/LennardJones/LennardJones.py | 14 +++++++------- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/examples/LennardJones/LJ_inference_plots.py b/examples/LennardJones/LJ_inference_plots.py index 324da425f..9f5d00cac 100644 --- a/examples/LennardJones/LJ_inference_plots.py +++ b/examples/LennardJones/LJ_inference_plots.py @@ -22,11 +22,11 @@ import numpy as np import hydragnn -from hydragnn.utils.time_utils import Timer +from hydragnn.utils.profiling_and_tracing.time_utils import Timer from hydragnn.utils.distributed import get_device from hydragnn.utils.model import load_existing_model -from hydragnn.utils.pickledataset import SimplePickleDataset -from hydragnn.utils.config_utils import ( +from hydragnn.utils.datasets.pickledataset import SimplePickleDataset +from hydragnn.utils.input_config_parsing.config_utils import ( update_config, ) from hydragnn.models.create import create_model_config @@ -35,7 +35,7 @@ from scipy.interpolate import griddata try: - from hydragnn.utils.adiosdataset import AdiosWriter, AdiosDataset + from hydragnn.utils.datasets.adiosdataset import AdiosWriter, AdiosDataset except ImportError: pass diff --git a/examples/LennardJones/LennardJones.py b/examples/LennardJones/LennardJones.py index 045b1d251..37d805430 100644 --- a/examples/LennardJones/LennardJones.py +++ b/examples/LennardJones/LennardJones.py @@ -30,16 +30,16 @@ # HydraGNN import hydragnn -from hydragnn.utils.print_utils import log -from hydragnn.utils.time_utils import Timer -import hydragnn.utils.tracer as tr +from hydragnn.utils.print.print_utils import log +from hydragnn.utils.profiling_and_tracing.time_utils import Timer +import hydragnn.utils.profiling_and_tracing.tracer as tr from hydragnn.preprocess.load_data import split_dataset -from hydragnn.utils.distdataset import DistDataset -from hydragnn.utils.pickledataset import SimplePickleWriter, SimplePickleDataset -from hydragnn.preprocess.utils import gather_deg +from hydragnn.utils.datasets.distdataset import DistDataset +from hydragnn.utils.datasets.pickledataset import SimplePickleWriter, SimplePickleDataset +from hydragnn.preprocess.graph_samples_checks_and_updates import gather_deg try: - from hydragnn.utils.adiosdataset import AdiosWriter, AdiosDataset + from hydragnn.utils.datasets.adiosdataset import AdiosWriter, AdiosDataset except ImportError: pass From f86560e3719da0886334f50fce5b06911c8acca9 Mon Sep 17 00:00:00 2001 From: allaffa Date: Thu, 19 Sep 2024 22:18:36 -0400 Subject: [PATCH 21/28] formatting fixed --- examples/LennardJones/LennardJones.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/examples/LennardJones/LennardJones.py b/examples/LennardJones/LennardJones.py index 37d805430..230e4f24c 100644 --- a/examples/LennardJones/LennardJones.py +++ b/examples/LennardJones/LennardJones.py @@ -35,7 +35,10 @@ import hydragnn.utils.profiling_and_tracing.tracer as tr from hydragnn.preprocess.load_data import split_dataset from hydragnn.utils.datasets.distdataset import DistDataset -from hydragnn.utils.datasets.pickledataset import SimplePickleWriter, SimplePickleDataset +from hydragnn.utils.datasets.pickledataset import ( + SimplePickleWriter, + SimplePickleDataset, +) from hydragnn.preprocess.graph_samples_checks_and_updates import gather_deg try: From 66ad33e439dff5dd6e2bb3aebded4721dac9cb01 Mon Sep 17 00:00:00 2001 From: allaffa Date: Thu, 19 Sep 2024 22:23:43 -0400 Subject: [PATCH 22/28] imports in LJ_data.py fixed --- examples/LennardJones/LJ_data.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/LennardJones/LJ_data.py b/examples/LennardJones/LJ_data.py index 6226ff6f8..49751df51 100644 --- a/examples/LennardJones/LJ_data.py +++ b/examples/LennardJones/LJ_data.py @@ -32,8 +32,8 @@ mpi4py.rc.threads = False # HydraGNN -from hydragnn.utils.abstractrawdataset import AbstractBaseDataset -from hydragnn.utils import nsplit +from hydragnn.utils.datasets.abstractrawdataset import AbstractBaseDataset +from hydragnn.utils.distributed import nsplit from hydragnn.preprocess.utils import get_radius_graph_pbc # Angstrom unit From 0e5e71fbdb6dd9f82b595ac87c4c3f19dc345e7b Mon Sep 17 00:00:00 2001 From: allaffa Date: Thu, 19 Sep 2024 22:44:18 -0400 Subject: [PATCH 23/28] import of graph utils fixed in LJ_data.py --- examples/LennardJones/LJ_data.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/LennardJones/LJ_data.py b/examples/LennardJones/LJ_data.py index 49751df51..e3d9249da 100644 --- a/examples/LennardJones/LJ_data.py +++ b/examples/LennardJones/LJ_data.py @@ -34,7 +34,7 @@ # HydraGNN from hydragnn.utils.datasets.abstractrawdataset import AbstractBaseDataset from hydragnn.utils.distributed import nsplit -from hydragnn.preprocess.utils import get_radius_graph_pbc +from hydragnn.preprocess.graph_samples_checks_and_updates import get_radius_graph_pbc # Angstrom unit primitive_bravais_lattice_constant_x = 3.8 From 2b01472049d0e6465a5a591a87ec47578904bd4d Mon Sep 17 00:00:00 2001 From: allaffa Date: Thu, 19 Sep 2024 23:03:34 -0400 Subject: [PATCH 24/28] import of setup.ddp() fixed in LennardJones --- examples/LennardJones/LennardJones.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/LennardJones/LennardJones.py b/examples/LennardJones/LennardJones.py index 230e4f24c..c00efd5ae 100644 --- a/examples/LennardJones/LennardJones.py +++ b/examples/LennardJones/LennardJones.py @@ -120,7 +120,7 @@ ################################################################################################################## # Always initialize for multi-rank training. - comm_size, rank = hydragnn.utils.setup_ddp() + comm_size, rank = hydragnn.utils.distributed.setup_ddp() ################################################################################################################## comm = MPI.COMM_WORLD From 2bf8a85f5b239dbe21f5944a04bcb684ac107898 Mon Sep 17 00:00:00 2001 From: allaffa Date: Thu, 19 Sep 2024 23:10:01 -0400 Subject: [PATCH 25/28] setup_log call fixed --- examples/LennardJones/LennardJones.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/LennardJones/LennardJones.py b/examples/LennardJones/LennardJones.py index c00efd5ae..7075e81bd 100644 --- a/examples/LennardJones/LennardJones.py +++ b/examples/LennardJones/LennardJones.py @@ -133,7 +133,7 @@ ) log_name = "LJ" if args.log is None else args.log - hydragnn.utils.setup_log(log_name) + hydragnn.utils.print.setup_log(log_name) writer = hydragnn.utils.get_summary_writer(log_name) log("Command: {0}\n".format(" ".join([x for x in sys.argv])), rank=0) From 40738cd60e0fddddc65db65555165aaab56b0895 Mon Sep 17 00:00:00 2001 From: allaffa Date: Thu, 19 Sep 2024 23:11:22 -0400 Subject: [PATCH 26/28] get_summary_writer call fixed --- examples/LennardJones/LennardJones.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/LennardJones/LennardJones.py b/examples/LennardJones/LennardJones.py index 7075e81bd..f187ea5da 100644 --- a/examples/LennardJones/LennardJones.py +++ b/examples/LennardJones/LennardJones.py @@ -134,7 +134,7 @@ log_name = "LJ" if args.log is None else args.log hydragnn.utils.print.setup_log(log_name) - writer = hydragnn.utils.get_summary_writer(log_name) + writer = hydragnn.utils.model.get_summary_writer(log_name) log("Command: {0}\n".format(" ".join([x for x in sys.argv])), rank=0) From b93798a33abc291f73ecc15675e070abbb69255f Mon Sep 17 00:00:00 2001 From: allaffa Date: Thu, 19 Sep 2024 23:15:50 -0400 Subject: [PATCH 27/28] additional calls fixed --- examples/LennardJones/LennardJones.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/examples/LennardJones/LennardJones.py b/examples/LennardJones/LennardJones.py index f187ea5da..eea808e9d 100644 --- a/examples/LennardJones/LennardJones.py +++ b/examples/LennardJones/LennardJones.py @@ -269,11 +269,11 @@ trainset, valset, testset, config["NeuralNetwork"]["Training"]["batch_size"] ) - config = hydragnn.utils.update_config(config, train_loader, val_loader, test_loader) + config = hydragnn.utils.input_config_parsing.update_config(config, train_loader, val_loader, test_loader) ## Good to sync with everyone right after DDStore setup comm.Barrier() - hydragnn.utils.save_config(config, log_name) + hydragnn.utils.input_config_parsing.save_config(config, log_name) timer.stop() @@ -281,7 +281,7 @@ config=config["NeuralNetwork"], verbosity=verbosity, ) - model = hydragnn.utils.get_distributed_model(model, verbosity) + model = hydragnn.utils.distributed.get_distributed_model(model, verbosity) learning_rate = config["NeuralNetwork"]["Training"]["Optimizer"]["learning_rate"] optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate) @@ -289,7 +289,7 @@ optimizer, mode="min", factor=0.5, patience=5, min_lr=0.00001 ) - hydragnn.utils.load_existing_model_config( + hydragnn.utils.model.load_existing_model_config( model, config["NeuralNetwork"]["Training"], optimizer=optimizer ) @@ -310,8 +310,8 @@ compute_grad_energy=True, ) - hydragnn.utils.save_model(model, optimizer, log_name) - hydragnn.utils.print_timers(verbosity) + hydragnn.utils.model.save_model(model, optimizer, log_name) + hydragnn.utils.profiling_and_tracing.print_timers(verbosity) if tr.has("GPTLTracer"): import gptl4py as gp From f0fa74c2882bd30e81b0bd17f4b7d13b802bc28e Mon Sep 17 00:00:00 2001 From: allaffa Date: Thu, 19 Sep 2024 23:28:09 -0400 Subject: [PATCH 28/28] black formatting fixedf --- examples/LennardJones/LennardJones.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/examples/LennardJones/LennardJones.py b/examples/LennardJones/LennardJones.py index eea808e9d..ecaa06629 100644 --- a/examples/LennardJones/LennardJones.py +++ b/examples/LennardJones/LennardJones.py @@ -269,7 +269,9 @@ trainset, valset, testset, config["NeuralNetwork"]["Training"]["batch_size"] ) - config = hydragnn.utils.input_config_parsing.update_config(config, train_loader, val_loader, test_loader) + config = hydragnn.utils.input_config_parsing.update_config( + config, train_loader, val_loader, test_loader + ) ## Good to sync with everyone right after DDStore setup comm.Barrier()