-
Notifications
You must be signed in to change notification settings - Fork 2
/
Decoder.py
57 lines (50 loc) · 2.15 KB
/
Decoder.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
import Attention as Attn
import torch
import torch.nn as nn
import torch.nn.functional as F
class LuongAttnDecoderRNN(nn.Module):
def __init__(self, attn_model, embedding, hidden_size,
output_size, n_layers=1):
super(LuongAttnDecoderRNN, self).__init__()
# Keep for reference
self.attn_model = attn_model
self.hidden_size = hidden_size
self.output_size = output_size
self.n_layers = n_layers
# Define layers
self.embedding = embedding
self.gru = nn.GRU(hidden_size,
hidden_size,
n_layers,
bidirectional=True)
self.concat = nn.Linear(hidden_size * 2, hidden_size)
self.out = nn.Linear(hidden_size, output_size)
self.attn = Attn.Attn(attn_model, hidden_size)
def forward(self, input_step, last_hidden, encoder_outputs):
# Note: we run this one step (word) at a time
# Get embedding of current input word
embedded = self.embedding(input_step)
# Forward through unidirectional GRU
rnn_output, hidden = self.gru(embedded, last_hidden)
rnn_output = rnn_output[:, :,
:self.hidden_size] + rnn_output[:, :,
self.hidden_size:]
# Calculate attention weights from the current GRU output
attn_weights = self.attn(rnn_output, encoder_outputs)
'''
Multiply attention weights to encoder outputs
to get new "weighted sum" context vector
'''
context = attn_weights.bmm(encoder_outputs.transpose(0, 1))
'''
Concatenate weighted context vector and GRU output using Luong eq. 5
'''
rnn_output = rnn_output.squeeze(0)
context = context.squeeze(1)
concat_input = torch.cat((rnn_output, context), 1)
concat_output = torch.tanh(self.concat(concat_input))
# Predict next word using Luong eq. 6
output = self.out(concat_output)
output = F.softmax(output, dim=1)
# Return output and final hidden state
return output, hidden