From 148b5745a57a6059231178c41859ecb09164c157 Mon Sep 17 00:00:00 2001 From: orangetin Date: Wed, 22 Mar 2023 18:15:36 -0500 Subject: [PATCH] Add argument to offload model from memory to disk --- environment.yml | 1 + pretrained/GPT-NeoX-20B/prepare.py | 11 ++++++++++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/environment.yml b/environment.yml index 2c80dc1..a94df4c 100644 --- a/environment.yml +++ b/environment.yml @@ -20,6 +20,7 @@ dependencies: - torchaudio=0.13.1 - torchvision=0.14.1 - pip: + - accelerate==0.17.1 - datasets==2.10.1 - loguru==0.6.0 - netifaces==0.11.0 diff --git a/pretrained/GPT-NeoX-20B/prepare.py b/pretrained/GPT-NeoX-20B/prepare.py index c30cd77..d30381d 100644 --- a/pretrained/GPT-NeoX-20B/prepare.py +++ b/pretrained/GPT-NeoX-20B/prepare.py @@ -12,6 +12,8 @@ help='model-name') parser.add_argument('--save-dir', type=str, default=DIR, help='model-name') + parser.add_argument('--offload-dir', type=str, default=None, + help='directory to offload from memory') args = parser.parse_args() if not os.path.exists(args.save_dir): @@ -24,7 +26,14 @@ config.save_pretrained(save_path) tokenizer = AutoTokenizer.from_pretrained(args.model_name) tokenizer.save_pretrained(save_path) - model = AutoModelForCausalLM.from_pretrained(args.model_name, torch_dtype=torch.float16) + + # offload model from memory to disk if offload-dir is specified + if args.offload_dir is not None: + if not os.path.exists(args.offload_dir): + os.mkdir(args.offload_dir) + model = AutoModelForCausalLM.from_pretrained(args.model_name, torch_dtype=torch.float16, device_map="auto", offload_folder=args.offload_dir) + else: + model = AutoModelForCausalLM.from_pretrained(args.model_name, torch_dtype=torch.float16) item = {} item['embed_in.weight'] = model.gpt_neox.embed_in.weight