-
Notifications
You must be signed in to change notification settings - Fork 1
/
ranking.py
94 lines (85 loc) · 2.23 KB
/
ranking.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
from framework import *
from grammar import *
from substitution import *
from clustering import *
from myGlobals import Globals
import numpy as np
INFI = 10**5
def getFeatures(cluster, input):
newInputLGG = InferTreeExp(frozenset(), cluster.inputList + [input])
Globals.literals = 0
Globals.variables = 0
RunProgram(Program(cluster.inputLGG, cluster.outputLGG), input)
return [len(cluster.inputList),
len(Var(cluster.inputLGG)),
len(Var(newInputLGG)),
len(Iter(cluster.inputLGG)),
len(Iter(newInputLGG)),
Globals.literals,
Globals.variables,
]
def CreateDBData(clusters, input, output):
data = []
for i in range(len(input)):
for j in range(len(clusters)):
try:
features = getFeatures(clusters[j], input[i])
except :
continue
prediction = execute(clusters[j], input[i])
predictionXML = prediction.toXML()
outputXML = output[i].toXML()
data.append((input[i], j))
return data
def CreateIdealMatchings(clusters, input, output):
data = ([], [])
for i in range(len(input)):
for j in range(len(clusters)):
try:
features = getFeatures(clusters[j], input[i])
except :
continue
prediction = execute(clusters[j], input[i])
predictionXML = prediction.toXML()
outputXML = output[i].toXML()
data[0].append(features)
if predictionXML == outputXML:
data[1].append(1)
else:
data[1].append(0)
return data
def MatchScore(cluster, input, classifier):
try:
features = getFeatures(cluster, input)
except:
return -1
return classifier.decision_function([features])[0]
def LearnWeights(data):
from sklearn import svm
from sklearn import grid_search
X = data[0]
Y = data[1]
clf = svm.LinearSVC(random_state=0)
C_s = np.logspace(-10, 0, 10)
param_grid = [{'C': C_s}]
opt_clf = grid_search.GridSearchCV(clf, param_grid, cv=4)
opt_clf.fit(X,Y)
print opt_clf.best_params_
clf.C = opt_clf.best_params_['C']
clf.fit(X,Y)
return clf
def GetBestOutput(clusters, input, classifier):
bestScore = -INFI
best = -1
for i in range(len(clusters)):
newScore = MatchScore(clusters[i], input, classifier)
if newScore > bestScore:
bestScore = newScore
best = i
if best < 0:
return None
try:
return execute(clusters[best], input)
except:
print "Program failed"
return None