forked from iamadamhair/ispy_python
-
Notifications
You must be signed in to change notification settings - Fork 0
/
questions.py
271 lines (198 loc) · 8.05 KB
/
questions.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
import math
import time
import logging as log
import numpy as np
import tags
import database as db
_questions = []
_descriptions = []
def ask(question_id, object, game, answer_data, answers, pO, Pi, p_tags, objects):
"""
Ask a question
"""
# Takes best question and updates all object probabilies based on the answer
probabilityD = get_tval()
question_tag = tags.get(question_id)
#answer = raw_input("Does it have " + tags[question_id-1] + "? (yes/no) ")
#answer = answer.lower()
answer = answer_data[object.id-1][question_id-1]
#print game_folder, object.id,objectlist[object.id-1][0],'qt->'+question_tag+' ' ,'ans->'+answer
for objectID in range(0, 17):
T = get_t(objectID+1, question_id)
N = objects[objectID][question_id][0]
D = objects[objectID][question_id][1]
if answer == 'yes':
answers.append(True)
K = probabilityD[T] + (N + 1)/(D + 2.0)
if Pi[0][question_id-1] == -1:
multiplier = K / 2
else:
multiplier = (K + Pi[objectID][question_id-1]) / 3
else:
answers.append(False)
K = (1 - probabilityD[T]) + (D - N + 1)/(D + 2.0)
if Pi[0][question_id-1] == -1:
multiplier = K / 2
else:
multiplier = (K + 1 - Pi[objectID][question_id-1]) / 3
pO[objectID] = pO[objectID] * multiplier
# Normalize the probabilities so that all object probabilities will sum to 1
pO = pO / np.sum(pO)
# Save the qustions to each answer and the updated probabilities
with open("example.txt", "a") as myfile:
myfile.write(question_tag + " -> " + answer+ " \n")
myfile.write(str(pO) + "\n")
return pO, answers
def get_p_tags():
"""
The P tag is the number of times a question has been answered true for a specific object
IE black and scissors has its own P tag
"""
p_tags = []
db.cursor.execute('SELECT qid, answer, COUNT(*) FROM answers GROUP BY qid, answer')
for row in db.cursor.fetchall():
if len(p_tags) == row[0]-1:
p_tags.append({0: 0, 1: 0})
p_tags[row[0]-1][row[1]] = row[2]
return p_tags
def get_best(game, objects, asked_questions, pO, Pi, p_tags, start):
"""
Finds the question that best splits our current subset of objects
"""
tvals = get_tval()
# Get top and bottom halves of current subset
top = (17 - start - 1)/2 + start + 1
bottom = 17 - top
bestDifference = 10
bestD = 0
probabilities_yes = []
probabilities_no = []
for i in range(0, 17):
probabilities_yes.append(0)
probabilities_no.append(0)
# We only consider objects beyond the start index when deciding
# Objects below the index are still updated when the question is asked and can shift back into play, but decisions are not made based on them while they're below start
pO_sorted = np.argsort(pO)
objects_considered = pO_sorted[start:]
for i in range(0,len(objects_considered)):
objects_considered[i] += 1
# Look over all tags
for j in range(1, 290):
yes = 0
no = 0
p_for_yes = 0
p_for_no = 0
pi_given_yes_times_log = 0
pi_given_no_times_log = 0
# Don't reask questions
if j not in asked_questions:
# Only look at objects in the correct subset
for i in objects_considered:
T = get_t(i, j)
num_yes = objects[i-1][j][0]
length = objects[i-1][j][1]
if Pi[i-1][j-1] == -1:
probabilities_yes[i-1] = pO[i-1] * (tvals[T] + (num_yes + 1.0)/(length + 2.0)) / 2
probabilities_no[i-1] = pO[i-1] * ((1 - tvals[T]) + (length - num_yes + 1.0)/(length + 2.0)) / 2
else:
probabilities_yes[i-1] = pO[i-1] * (tvals[T] + (num_yes + 1.0)/(length + 2.0) + Pi[i-1][j-1]) / 3
probabilities_no[i-1] = pO[i-1] * ((1 - tvals[T]) + (length - num_yes + 1.0)/(length + 2.0) + 1 - Pi[i-1][j-1]) / 3
# Normalize the probabilities
probabilities_yes = np.asarray(probabilities_yes)
probabilities_no = np.asarray(probabilities_no)
probabilities_yes = probabilities_yes / sum(probabilities_yes)
probabilities_no = probabilities_no / sum(probabilities_no)
# Do some fancy math to find out which tag lowers total entropy the most (AKA it gives us the most knowledge)
for i in objects_considered:
num_yes = objects[i-1][j][0]
length = objects[i-1][j][1]
p_for_yes += pO[i-1] * num_yes / length
p_for_no += pO[i-1] * (length - num_yes) / length
yes += probabilities_yes[i-1]
no += probabilities_no[i-1]
pi_given_yes_times_log += probabilities_yes[i-1] * math.log(probabilities_yes[i-1], 2)
pi_given_no_times_log += probabilities_no[i-1] * math.log(probabilities_no[i-1], 2)
entropy = -p_for_yes * pi_given_yes_times_log - p_for_no * pi_given_no_times_log
if entropy < bestDifference:
bestD = j
bestDifference = entropy
return bestD
def copy_into_answers():
"""
QuestionAnswers holds just the answer set data
Copies the pure data into a table that will be appended to throughout gameplay
"""
log.info('Copying into answers')
db.cursor.execute('SELECT tag, answer, object from QuestionAnswers')
results = db.cursor.fetchall()
for result in results:
db.cursor.execute('SELECT id from Tags where tag = %s', (result[0],))
qid = db.cursor.fetchone()[0]
db.cursor.execute('INSERT INTO answers (qid, oid, answer) VALUES (%s, %s, %s)', (qid, result[2], result[1]))
db.connection.commit()
def build_pqd():
"""
Pqd is the probability that an the answer will be yes to a keyword asked about an object where the keyword shows up X number of times in the descriptions
Summed over all objects where a keyword shows up X number of times
"""
log.info('Building Pqd')
probabilityD = [0,0,0,0,0,0,0]
denominator = [0,0,0,0,0,0,0]
all_tags = tags.get_all()
for objectID in range(1,18):
log.info(" Object %d", objectID)
for tag in range(0, 289):
db.cursor.execute('SELECT * FROM Descriptions WHERE description like "%' + all_tags[tag] + '%" AND objectID = ' + str(objectID))
T = len(db.cursor.fetchall())
#T is a based on a tag and an object description. T is how many times a tag is used in an object's description. It can be 0-6
db.cursor.execute('SELECT * FROM QuestionAnswers WHERE tag = "' + all_tags[tag] + '" AND object = ' + str(objectID) + ' AND answer = TRUE')
count = len(db.cursor.fetchall())
#count is the number of times someone answered yes to a tag/object pair
db.cursor.execute('SELECT * FROM QuestionAnswers WHERE tag = "' + all_tags[tag] + '" AND object = ' + str(objectID))
D = len(db.cursor.fetchall())
#D is the total number of times a tag/object pair has been asked (yesses and nos)
probabilityD[T] = probabilityD[T] + count
denominator[T] = denominator[T] + D
#For the T value based on the specific tag/object pair, update the probability of all tag/object pairs with the same T value
for freq in range(0,7):
#This puts the sum of the yes answers and the total answers into the row that corresponds with the T value
db.cursor.execute('INSERT INTO Pqd (t_value, yes_answers, total_answers) VALUES (%s, %s, %s)', (freq, probabilityD[freq], denominator[freq]))
db.connection.commit()
print probabilityD[freq]
def get_subset_split(pO):
"""
When probabilities ordered least to greatest, returns index of largest difference between probabilities
System asks questions to try to split subset in half each time, so the split should move closer to the max probability each time
"""
bestDifference = 0
pO_sorted = np.sort(pO)
pO_args_sorted = np.argsort(pO)
# for x in range(0,17):
#print str(pO_args_sorted[x]) + " -> " + str(pO_sorted[x])
diff = 0
bestDiff = 0
for x in range(0, pO_sorted.size-1):
if pO_sorted[x+1] - pO_sorted[x] > diff:
diff = pO_sorted[x+1] - pO_sorted[x]
bestDiff = x
return bestDiff
def get_tval():
"""
Returns a list of 14 proportions of yes answers. 1 entry per t_value
"""
db.cursor.execute('SELECT yes_answers/total_answers FROM Pqd')
result = db.cursor.fetchall()
tvals = []
for r in result:
tvals.append(float(r[0]))
return tvals
def get_t(object_id, question_id):
"""
Returns the number of descriptions that an object has that contains a specific tag
"""
tag = tags.get(question_id)
db.cursor.execute('SELECT COUNT(*) \
FROM Descriptions \
WHERE description LIKE %s \
AND objectID = %s', ('%{0}%'.format(tag), str(object_id)))
return db.cursor.fetchone()[0]