-
Notifications
You must be signed in to change notification settings - Fork 3
/
gen_sentiment_files_dictionary.lua
84 lines (65 loc) · 2.33 KB
/
gen_sentiment_files_dictionary.lua
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
require 'mobdebug'.start()
require 'nn'
require 'nngraph'
require 'optim'
require 'Embedding'
local model_utils=require 'model_utils'
require 'table_utils'
nngraph.setDebug(true)
function read_words(fn)
fd = io.lines(fn)
sentences = {}
line = fd()
while line do
sentence = {}
for _, word in pairs(string.split(line, " ")) do
sentence[#sentence + 1] = word
end
sentences[#sentences + 1] = sentence
line = fd()
end
return sentences
end
inv_vocabulary_en = table.load('inv_vocabulary_en')
vocabulary_en = table.load('vocabulary_en')
indexes = torch.Tensor(#vocabulary_en)
for i = 1, indexes:size(1) do
indexes[i] = i
end
m = torch.load('model')
word_center = indexes:clone()
word_outer = indexes:clone()
_, x_outer, x_center = unpack(m:forward({word_center, word_outer}))
word_vectors = x_outer + x_center
phrases = read_words('dictionary')
sentiment_lables = read_words('sentiment_labels')
assert(#phrases == #sentiment_lables)
phrases_filtered = {}
phrases_filtered_text = {}
sentiment_lables_filtered = {}
for _, sentence in pairs(phrases) do
local short_sentence = {}
for i, word in pairs(sentence) do
if i ~= #sentence and inv_vocabulary_en[word] ~= nil then
short_sentence[#short_sentence + 1] = inv_vocabulary_en[word]
end
end
if #short_sentence > 0 then
local t = torch.Tensor(#short_sentence, word_vectors:size(2))
for i, word in pairs(short_sentence) do
t[{{i}, {}}] = word_vectors[{{short_sentence[i]}, {}}]
end
phrases_filtered[#phrases_filtered + 1] = t:mean(1)
phrases_filtered_text[#phrases_filtered_text + 1] = short_sentence
sentiment_labels_sentence = sentiment_lables[tonumber(sentence[#sentence])]
sentiment_lables_filtered[#sentiment_lables_filtered + 1] = tonumber(sentiment_labels_sentence[#sentiment_labels_sentence])
end
end
assert(#sentiment_lables_filtered == #phrases_filtered)
phrases_filtered_tensor = torch.Tensor(#phrases_filtered, word_vectors:size(2))
sentiment_lables_filtered_tensor = torch.Tensor(#phrases_filtered, 1)
for i, _ in pairs(phrases_filtered) do
phrases_filtered_tensor[{{i}, {}}] = phrases_filtered[i]
sentiment_lables_filtered_tensor[{{i}, {}}] = sentiment_lables_filtered[i]
end
torch.save('sentiment_features_and_labels', {phrases_filtered_tensor, sentiment_lables_filtered_tensor, phrases_filtered_text})