Skip to content

Commit

Permalink
linear linear
Browse files Browse the repository at this point in the history
  • Loading branch information
sijunhe committed Apr 12, 2023
1 parent 8bea1a9 commit a6150b2
Show file tree
Hide file tree
Showing 4 changed files with 17 additions and 16 deletions.
1 change: 1 addition & 0 deletions paddlenlp/layers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
GPLinkerForEventExtraction,
GPLinkerForRelationExtraction,
)
from .linear import Linear
from .lora import *
from .sequence import sequence_mask
from .tcn import TCN, TemporalBlock
10 changes: 5 additions & 5 deletions paddlenlp/transformers/bert/modeling.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@

from paddlenlp.transformers.model_utils import PretrainedModel, register_base_model

from ...layers import Linear as TransposedLinear
from ...utils.converter import StateDictNameMapping
from ...utils.env import CONFIG_NAME
from ..model_outputs import (
Expand Down Expand Up @@ -863,10 +864,9 @@ def __init__(self, config: BertConfig):
self.transform = nn.Linear(config.hidden_size, config.hidden_size)
self.activation = getattr(nn.functional, config.hidden_act)
self.layer_norm = nn.LayerNorm(config.hidden_size)
self.decoder = nn.Linear(config.vocab_size, config.hidden_size)
self.decoder_bias = self.create_parameter(
shape=[config.vocab_size], dtype=self.decoder.weight.dtype, is_bias=True
)
self.decoder = TransposedLinear(config.hidden_size, config.vocab_size)
# link bias to load pretrained weights
self.decoder_bias = self.decoder.bias

def forward(self, hidden_states, masked_positions=None):
if masked_positions is not None:
Expand All @@ -876,7 +876,7 @@ def forward(self, hidden_states, masked_positions=None):
hidden_states = self.transform(hidden_states)
hidden_states = self.activation(hidden_states)
hidden_states = self.layer_norm(hidden_states)
hidden_states = paddle.tensor.matmul(hidden_states, self.decoder.weight, transpose_y=True) + self.decoder_bias
hidden_states = self.decoder(hidden_states)
return hidden_states


Expand Down
11 changes: 6 additions & 5 deletions paddlenlp/transformers/ernie/modeling.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
from paddle import Tensor
from paddle.fluid.dygraph.base import in_declarative_mode

from ...layers import Linear as TransposedLinear
from ...utils.env import CONFIG_NAME
from .. import PretrainedModel, register_base_model
from ..model_outputs import (
Expand Down Expand Up @@ -745,10 +746,9 @@ def __init__(
self.transform = nn.Linear(config.hidden_size, config.hidden_size, weight_attr=weight_attr)
self.activation = getattr(nn.functional, config.hidden_act)
self.layer_norm = nn.LayerNorm(config.hidden_size)
self.decoder = nn.Linear(config.vocab_size, config.hidden_size)
self.decoder_bias = self.create_parameter(
[config.vocab_size], is_bias=True, default_initializer=nn.initializer.Constant(value=0)
)
self.decoder = TransposedLinear(config.hidden_size, config.vocab_size)
# link bias to load pretrained weights
self.decoder_bias = self.decoder.bias

def forward(self, hidden_states, masked_positions=None):
if masked_positions is not None:
Expand All @@ -758,7 +758,8 @@ def forward(self, hidden_states, masked_positions=None):
hidden_states = self.transform(hidden_states)
hidden_states = self.activation(hidden_states)
hidden_states = self.layer_norm(hidden_states)
hidden_states = paddle.tensor.matmul(hidden_states, self.decoder.weight, transpose_y=True) + self.decoder_bias
hidden_states = self.decoder(hidden_states)
# hidden_states = paddle.tensor.matmul(hidden_states, self.decoder.weight, transpose_y=True) + self.decoder_bias
return hidden_states


Expand Down
11 changes: 5 additions & 6 deletions paddlenlp/transformers/roberta/modeling.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import paddle.nn.functional as F
from paddle import Tensor

from ...layers import Linear as TransposedLinear
from ...utils.converter import StateDictNameMapping
from .. import PretrainedModel, register_base_model
from ..model_outputs import (
Expand Down Expand Up @@ -1228,19 +1229,17 @@ def __init__(self, config: RobertaConfig):
self.dense = nn.Linear(config.hidden_size, config.hidden_size)
self.layer_norm = nn.LayerNorm(config.hidden_size, epsilon=config.layer_norm_eps)

tensor = paddle.zeros((config.vocab_size,))
self.bias = paddle.create_parameter(
shape=tensor.shape, dtype=tensor.dtype, default_initializer=nn.initializer.Assign(tensor)
)
self.decoder = nn.Linear(config.vocab_size, config.hidden_size)
self.decoder = TransposedLinear(config.hidden_size, config.vocab_size)
# link bias to load pretrained weights
self.bias = self.decoder.bias

def forward(self, features, **kwargs):
x = self.dense(features)
x = F.gelu(x)
x = self.layer_norm(x)

# project back to size of vocabulary with bias
x = paddle.matmul(x, self.decoder.weight, transpose_y=True) + self.bias
x = self.decoder(x)

return x

Expand Down

0 comments on commit a6150b2

Please sign in to comment.