-
Notifications
You must be signed in to change notification settings - Fork 1
/
predict_text_meaning.py
96 lines (78 loc) · 2.57 KB
/
predict_text_meaning.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
# -*- coding: utf-8 -*-
#By abin and hari
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
import pandas as pd
import os
from keras.models import model_from_json
import nltk
EMBEDDING_FILE='glove/wiki.ml.vec'#glove.6B.300d.txt'
TRAIN_FILE='data/querydata(m)f.tsv'
VOC_FILE='word_map_ml.json'
MODEL_FILE="model_ml.json"
MODEL_WEIGHT="model_ml.h5"
MODEL_PIC='model_ml.png'
SUGGEST_DATA='data/querydata_to_be_suggested(m)f.tsv'
os.environ['KERAS_BACKEND'] = 'theano'
def load_map():
import json
with open(VOC_FILE, 'r',encoding='utf-8') as fp:
word_map = json.load(fp)
#print(word_map)
return word_map
def load_model():
json_file = open(MODEL_FILE, 'r')
loaded_model_json = json_file.read()
json_file.close()
model = model_from_json(loaded_model_json)
# load weights into new model
model.load_weights(MODEL_WEIGHT)
print("Loaded model from disk")
return model
def load_data():
data_train = pd.read_csv(SUGGEST_DATA, sep='\t',encoding='utf-8')
return data_train
def predict(text,model,word_map,data_train):
if(len(text[0])>0):
print(text)
t = Tokenizer()
t.fit_on_texts(text)
#print(text)
tok_docs = []
tok = nltk.word_tokenize(text[0])
tok_docs.append(tok)
#print(tok_docs)
# word_map=load_map()
max1=max(word_map.values())
text_seq=[]
for sent in tok_docs:
s1=[]
for w in sent:
if w in word_map.keys():
s1.append(word_map[w])
# else:
# #max1=max1+1
# s1.append(0)
text_seq.append(s1)
#print(text_seq)
max_length = 50
padded_docs = pad_sequences(text_seq, maxlen=max_length, padding='post')
pred = model.predict(padded_docs)
# print(pred)
# print(padded_docs)
out = []
print(pred)
for doc in pred:
out.append(doc.tolist().index(max(doc.tolist())))
print(out)
if(pred.tolist()[0][out[0]]>0.85):
print(pred.tolist()[0][out[0]])
return (data_train.loc[data_train['sentiment'] == out[0]]['question']).values.tolist()[0]
else:
print(pred.tolist()[0][0])
return (data_train.loc[data_train['sentiment'] == 0]['question']).values.tolist()[0]
else:
return ""
# l=predict(["ഹോട്ടലുകളുടെ "])
# print(l)
# print(predict(['ഹോട്ടലുകളുടെ പട്ടിക തരുക']))