-
Notifications
You must be signed in to change notification settings - Fork 0
/
produce_full_results.py
62 lines (42 loc) · 1.42 KB
/
produce_full_results.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
from keras.models import load_model
from keras.models import Model
import numpy as np
import pickle
with open("data.pkl", "rb") as infile:
data = pickle.load(infile) #same as training
with open("found_tokens.pkl", "rb") as infile:
all_tokens = pickle.load(infile) #dictionary of token to embedding
nb_timesteps = 186 # 95% percentile of description length
word_embedding_dims = 300 # according to below message
nb_industries = 107 # idk. 100 something?
batch_size = 1
def input_vectors(source):
"""
Feature_vector and label vector generator. The source
can be either training, or validation.
"""
count = 0
while(True):
X = []
for i in range(batch_size):
desc = source[count%len(source)]
X.append(generate_vector(desc))
X=np.array(X)
assert X.shape == (batch_size, nb_timesteps, word_embedding_dims)
yield X
def generate_vector(desc):
'''
Generates the feature matrix given a list of tokens
'''
arrays = []
for token in desc:
val = all_tokens.get(token, np.zeros(300))
arrays.append(val)
a = np.array(arrays)
return a
model = load_model('my_model.h5')
intermediate_layer_model = Model(inputs=model.input,
outputs=model.get_layer(index=1).output)
intermediate_output = intermediate_layer_model.predict_generator(input_vectors(data), verbose=1, steps=int(len(data)/batch_size))
with open("vec_rep.pkl", "wb") as outfile:
pickle.dump(intermediate_output, outfile)