diff --git a/src/transformers/models/bigbird_pegasus/modeling_bigbird_pegasus.py b/src/transformers/models/bigbird_pegasus/modeling_bigbird_pegasus.py index d1ba54213a0346..9f8e3cd19cd835 100755 --- a/src/transformers/models/bigbird_pegasus/modeling_bigbird_pegasus.py +++ b/src/transformers/models/bigbird_pegasus/modeling_bigbird_pegasus.py @@ -1569,6 +1569,7 @@ class BigBirdPegasusPreTrainedModel(PreTrainedModel): supports_gradient_checkpointing = True _no_split_modules = ["BigBirdPegasusEncoderLayer", "BigBirdPegasusDecoderLayer"] _skip_keys_device_placement = "past_key_values" + _supports_param_buffer_assignment = False def _init_weights(self, module): std = self.config.init_std