From 7695845e48cc46acf55dfb0fb9eb5449917f4766 Mon Sep 17 00:00:00 2001 From: Caglar Demir Date: Thu, 14 Mar 2024 16:11:27 +0100 Subject: [PATCH] Working version of continual learning --- README.md | 7 ++--- dicee/config.py | 2 ++ dicee/evaluator.py | 2 +- dicee/executer.py | 31 +++++++++++----------- dicee/models/base_model.py | 2 ++ dicee/scripts/run.py | 48 ++++++++++++++++++++++------------- dicee/trainer/dice_trainer.py | 2 +- tests/test_regression_cl.py | 2 +- 8 files changed, 57 insertions(+), 39 deletions(-) diff --git a/README.md b/README.md index 8a1bd828..8fbe9fe2 100644 --- a/README.md +++ b/README.md @@ -95,15 +95,16 @@ A KGE model can also be trained from the command line ```bash dicee --dataset_dir "KGs/UMLS" --model Keci --eval_model "train_val_test" ``` -dicee automaticaly detects available GPUs and trains a model with distributed data parallels technique. Under the hood, dicee uses lighning as a default trainer. +dicee automatically detects available GPUs and trains a model with distributed data parallels technique. ```bash # Train a model by only using the GPU-0 CUDA_VISIBLE_DEVICES=0 dicee --dataset_dir "KGs/UMLS" --model Keci --eval_model "train_val_test" # Train a model by only using GPU-1 CUDA_VISIBLE_DEVICES=1 dicee --dataset_dir "KGs/UMLS" --model Keci --eval_model "train_val_test" -NCCL_P2P_DISABLE=1 CUDA_VISIBLE_DEVICES=0,1 python dicee/scripts/run.py --trainer PL --dataset_dir "KGs/UMLS" --model Keci --eval_model "train_val_test" +# Train a model by using all available GPUs +dicee --dataset_dir "KGs/UMLS" --model Keci --eval_model "train_val_test" ``` -Under the hood, dicee executes run.py script and uses lighning as a default trainer +Under the hood, dicee executes the run.py script and uses [lightning](https://lightning.ai/) as a default trainer. ```bash # Two equivalent executions # (1) diff --git a/dicee/config.py b/dicee/config.py index 5ba66e14..3d7921cc 100644 --- a/dicee/config.py +++ b/dicee/config.py @@ -133,6 +133,8 @@ def __init__(self, **kwargs): self.block_size: int = None "block size of LLM" + self.continual_learning=None + "Path of a pretrained model size of LLM" def __iter__(self): # Iterate diff --git a/dicee/evaluator.py b/dicee/evaluator.py index fdee4f7b..ab46393a 100644 --- a/dicee/evaluator.py +++ b/dicee/evaluator.py @@ -456,7 +456,7 @@ def dummy_eval(self, trained_model, form_of_labelling: str): valid_set=valid_set, test_set=test_set, trained_model=trained_model) - elif self.args.scoring_technique in ['KvsAll', 'KvsSample', '1vsAll', 'PvsAll', 'CCvsAll']: + elif self.args.scoring_technique in ["AllvsAll",'KvsAll', 'KvsSample', '1vsAll']: self.eval_with_vs_all(train_set=train_set, valid_set=valid_set, test_set=test_set, diff --git a/dicee/executer.py b/dicee/executer.py index 4399af63..7be7fbef 100644 --- a/dicee/executer.py +++ b/dicee/executer.py @@ -234,31 +234,32 @@ class ContinuousExecute(Execute): (1) Loading & Preprocessing & Serializing input data. (2) Training & Validation & Testing (3) Storing all necessary info + + During the continual learning we can only modify *** num_epochs *** parameter. + Trained model stored in the same folder as the seed model for the training. + Trained model is noted with the current time. """ def __init__(self, args): - assert os.path.exists(args.path_experiment_folder) - assert os.path.isfile(args.path_experiment_folder + '/configuration.json') - # (1) Load Previous input configuration - previous_args = load_json(args.path_experiment_folder + '/configuration.json') - dargs = vars(args) - del args - for k in list(dargs.keys()): - if dargs[k] is None: - del dargs[k] - # (2) Update (1) with new input - previous_args.update(dargs) + # (1) Current input configuration. + assert os.path.exists(args.continual_learning) + assert os.path.isfile(args.continual_learning + '/configuration.json') + # (2) Load previous input configuration. + previous_args = load_json(args.continual_learning + '/configuration.json') + args=vars(args) + # + previous_args["num_epochs"]=args["num_epochs"] + previous_args["continual_learning"]=args["continual_learning"] + print("Updated configuration:",previous_args) try: - report = load_json(dargs['path_experiment_folder'] + '/report.json') + report = load_json(args['continual_learning'] + '/report.json') previous_args['num_entities'] = report['num_entities'] previous_args['num_relations'] = report['num_relations'] except AssertionError: print("Couldn't find report.json.") previous_args = SimpleNamespace(**previous_args) - previous_args.full_storage_path = previous_args.path_experiment_folder print('ContinuousExecute starting...') print(previous_args) - # TODO: can we remove continuous_training from Execute ? super().__init__(previous_args, continuous_training=True) def continual_start(self) -> dict: @@ -279,7 +280,7 @@ def continual_start(self) -> dict: """ # (1) self.trainer = DICE_Trainer(args=self.args, is_continual_training=True, - storage_path=self.args.path_experiment_folder) + storage_path=self.args.continual_learning) # (2) self.trained_model, form_of_labelling = self.trainer.continual_start() diff --git a/dicee/models/base_model.py b/dicee/models/base_model.py index dbbdf9cf..c6a51890 100644 --- a/dicee/models/base_model.py +++ b/dicee/models/base_model.py @@ -431,6 +431,8 @@ class IdentityClass(torch.nn.Module): def __init__(self, args=None): super().__init__() self.args = args + def __call__(self, x): + return x @staticmethod def forward(x): diff --git a/dicee/scripts/run.py b/dicee/scripts/run.py index 7a4edcd8..ad085758 100755 --- a/dicee/scripts/run.py +++ b/dicee/scripts/run.py @@ -1,5 +1,5 @@ import json -from dicee.executer import Execute +from dicee.executer import Execute, ContinuousExecute import argparse def get_default_arguments(description=None): @@ -43,9 +43,9 @@ def get_default_arguments(description=None): parser.add_argument('--optim', type=str, default='Adam', help='An optimizer', choices=['Adam', 'AdamW', 'SGD',"NAdam", "Adagrad", "ASGD"]) - parser.add_argument('--embedding_dim', type=int, default=32, + parser.add_argument('--embedding_dim', type=int, default=256, help='Number of dimensions for an embedding vector. ') - parser.add_argument("--num_epochs", type=int, default=500, help='Number of epochs for training. ') + parser.add_argument("--num_epochs", type=int, default=100, help='Number of epochs for training. ') parser.add_argument('--batch_size', type=int, default=1024, help='Mini batch size. If None, automatic batch finder is applied') parser.add_argument("--lr", type=float, default=0.01) @@ -73,14 +73,6 @@ def get_default_arguments(description=None): parser.add_argument("--gradient_accumulation_steps", type=int, default=0, help="e.g. gradient_accumulation_steps=2 " "implies that gradients are accumulated at every second mini-batch") - parser.add_argument('--num_folds_for_cv', type=int, default=0, - help='Number of folds in k-fold cross validation.' - 'If >2 ,no evaluation scenario is applied implies no evaluation.') - parser.add_argument("--eval_model", type=str, default="train_val_test", - choices=["None", "train", "train_val", "train_val_test", "test"], - help='Evaluating link prediction performance on data splits. ') - parser.add_argument("--save_model_at_every_epoch", type=int, default=None, - help='At every X number of epochs model will be saved. If None, we save 4 times.') parser.add_argument("--label_smoothing_rate", type=float, default=0.0, help='None for not using it.') parser.add_argument("--kernel_size", type=int, default=3, help="Square kernel size for convolution based models.") @@ -90,19 +82,34 @@ def get_default_arguments(description=None): help='Number of cores to be used. 0 implies using single CPU') parser.add_argument("--random_seed", type=int, default=1, help='Seed for all, see pl seed_everything().') + parser.add_argument('--p', type=int, default=0, + help='P for Clifford Algebra') + parser.add_argument('--q', type=int, default=1, + help='Q for Clifford Algebra') + parser.add_argument('--pykeen_model_kwargs', type=json.loads, default={}) + + # Evaluation Related + parser.add_argument('--num_folds_for_cv', type=int, default=0, + help='Number of folds in k-fold cross validation.' + 'If >2 ,no evaluation scenario is applied implies no evaluation.') + parser.add_argument("--eval_model", type=str, default="train_val_test", + choices=["None", "train", "train_val", "train_val_test", "test"], + help='Evaluating link prediction performance on data splits. ') + parser.add_argument("--save_model_at_every_epoch", type=int, default=None, + help='At every X number of epochs model will be saved. If None, we save 4 times.') + # Continual Learning + parser.add_argument("--continual_learning", type=str, default=None, + help="The path of a folder containing a pretrained model and configurations") + parser.add_argument("--sample_triples_ratio", type=float, default=None, help='Sample input data.') parser.add_argument("--read_only_few", type=int, default=None, help='READ only first N triples. If 0, read all.') parser.add_argument("--add_noise_rate", type=float, default=0.0, help='Add x % of noisy triples into training dataset.') - parser.add_argument('--p', type=int, default=0, - help='P for Clifford Algebra') - parser.add_argument('--q', type=int, default=1, - help='Q for Clifford Algebra') + # WIP + parser.add_argument('--r', type=int, default=0, help='R for Clifford Algebra') - parser.add_argument('--pykeen_model_kwargs', type=json.loads, default={}) - # WIP parser.add_argument('--block_size', type=int, default=8, help='Block size for BytE') parser.add_argument("--byte_pair_encoding", @@ -122,7 +129,12 @@ def get_default_arguments(description=None): return parser.parse_args(description) def main(): - Execute(get_default_arguments()).start() + + args = get_default_arguments() + if args.continual_learning: + ContinuousExecute(args).continual_start() + else: + Execute(get_default_arguments()).start() if __name__ == '__main__': main() diff --git a/dicee/trainer/dice_trainer.py b/dicee/trainer/dice_trainer.py index 34272f48..4ab3376d 100644 --- a/dicee/trainer/dice_trainer.py +++ b/dicee/trainer/dice_trainer.py @@ -165,7 +165,7 @@ def continual_start(self): self.trainer = self.initialize_trainer(callbacks=get_callbacks(self.args)) model, form_of_labelling = self.initialize_or_load_model() assert form_of_labelling in ['EntityPrediction', 'RelationPrediction', 'Pyke'] - assert self.args.scoring_technique in ['KvsSample', '1vsAll', 'KvsAll', 'NegSample'] + assert self.args.scoring_technique in ["AllvsAll",'KvsSample', '1vsAll', 'KvsAll', 'NegSample'] train_loader = self.initialize_dataloader( reload_dataset(path=self.storage_path, form_of_labelling=form_of_labelling, scoring_technique=self.args.scoring_technique, diff --git a/tests/test_regression_cl.py b/tests/test_regression_cl.py index 7b530a0b..3494db1c 100644 --- a/tests/test_regression_cl.py +++ b/tests/test_regression_cl.py @@ -25,7 +25,7 @@ def test_k_vs_all(self): args.init_param = 'xavier_normal' result = Execute(args).start() - args.path_experiment_folder = result['path_experiment_folder'] + args.continual_learning = result['path_experiment_folder'] cl_result = ContinuousExecute(args).continual_start() assert cl_result['Train']['H@10'] >= result['Train']['H@10']