Skip to content

Commit

Permalink
[GPT-3] Fix shared weights sync for PipelineLayer (#7775)
Browse files Browse the repository at this point in the history
  • Loading branch information
DrownFish19 authored Jan 4, 2024
1 parent fe3c052 commit 4952069
Showing 1 changed file with 16 additions and 1 deletion.
17 changes: 16 additions & 1 deletion paddlenlp/transformers/model_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,10 @@
)
from huggingface_hub.utils import EntryNotFoundError
from paddle import Tensor
from paddle.distributed.fleet.meta_parallel.parallel_layers import SharedLayerDesc
from paddle.distributed.fleet.meta_parallel.parallel_layers import (
PipelineLayer,
SharedLayerDesc,
)
from paddle.nn import Embedding, Layer

# TODO(fangzeyang) Temporary fix and replace by paddle framework downloader later
Expand Down Expand Up @@ -935,6 +938,18 @@ def _post_init(self, original_init, *args, **kwargs):
):
self.init_weights()

# Note:
# 1. PipelineLayer will create parameters for each layer and
# call `_synchronize_shared_weights()` to synchronize the shared parameters.
# 2. When setting the model `state_dict`, `_synchronize_shared_weights` will be called to
# synchronize the shared parameters.
# However, `self._init_weights` will re-initialize the parameters without
# synchronizing the shared parameters. If the following step does not load a checkpoint,
# the shared parameters will be different.

if isinstance(self, PipelineLayer):
self._synchronize_shared_weights()

def _init_weights(self, layer):
"""
Initialize the weights. This method should be overridden by derived class.
Expand Down

0 comments on commit 4952069

Please sign in to comment.