Skip to content

Commit

Permalink
save space when converting hf model to megatron model. (#25950)
Browse files Browse the repository at this point in the history
* fix convert megatron model too large

* fix convert megatron model too large
  • Loading branch information
flower-with-safe authored Sep 5, 2023
1 parent b8def68 commit 172f42c
Showing 1 changed file with 3 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -737,7 +737,7 @@ def convert_checkpoint_from_transformers_to_megatron(args):
word_emb_dict = get_element_from_dict_by_path(
output_state_dict[i], "model.language_model.embedding.word_embeddings"
)
word_emb_dict["weight"] = out_word_embed[i]
word_emb_dict["weight"] = out_word_embed[i].clone()

# Transformer layers
print("converting transformer layers")
Expand Down Expand Up @@ -845,7 +845,7 @@ def convert_checkpoint_from_transformers_to_megatron(args):
for i in range(args.target_tensor_model_parallel_size):
params_dict = get_element_from_dict_by_path(output_state_dict[i], "model.language_model.encoder")
params_dict[layer_name] = (
params[i] if (op_name + "." + weight_or_bias in tensor_parallel_params) else params
params[i].clone() if (op_name + "." + weight_or_bias in tensor_parallel_params) else params
)

if pp_rank == args.target_pipeline_model_parallel_size - 1:
Expand All @@ -860,7 +860,7 @@ def convert_checkpoint_from_transformers_to_megatron(args):
# add the LM head
for i in range(args.target_tensor_model_parallel_size):
params_dict = get_element_from_dict_by_path(output_state_dict[i], "model.word_embeddings_for_head")
params_dict["weight"] = out_word_embed[i]
params_dict["weight"] = out_word_embed[i].clone()

# saving the state dict as per the tp_rank and pp_rank
for tp_rank in range(args.target_tensor_model_parallel_size):
Expand Down

0 comments on commit 172f42c

Please sign in to comment.