Skip to content

Commit

Permalink
fix gated_linear_unit bug (NVIDIA#8042)
Browse files Browse the repository at this point in the history
Signed-off-by: Agoniii <815244047@qq.com>
Co-authored-by: Chen Cui <chcui@nvidia.com>
Signed-off-by: Sasha Meister <ameister@nvidia.com>
  • Loading branch information
2 people authored and sashameister committed Feb 15, 2024
1 parent 72ef654 commit bb32442
Showing 1 changed file with 2 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -382,6 +382,7 @@ def build_transformer_config(self) -> TransformerConfig:
add_bias_linear = self.cfg.get('bias', True)

activation = self.cfg.get('activation', 'gelu')
gated_linear_unit = activation.endswith('glu')
# TODO: need to check which activation functions are supported in mcore
activation_func = activation_to_func(activation)

Expand Down Expand Up @@ -431,7 +432,7 @@ def build_transformer_config(self) -> TransformerConfig:
'apply_residual_connection_post_layernorm': False, # we don't use this in NeMo
'layernorm_zero_centered_gamma': False,
'add_bias_linear': add_bias_linear,
'gated_linear_unit': False,
'gated_linear_unit': gated_linear_unit,
'activation_func': activation_func,
'normalization': normalization,
'init_method': init_method,
Expand Down

0 comments on commit bb32442

Please sign in to comment.