-
Notifications
You must be signed in to change notification settings - Fork 6
/
eval.py
99 lines (79 loc) · 3.32 KB
/
eval.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
import numpy as np
from docopt import docopt
from scipy.stats import spearmanr
import os
def get_ys(model_answers, true_answers):
"""
:param model_answers: path to tab-separated answer file (lemma + "\t" + score)
:param true_answers: path to tab-separated gold answer file (lemma + "\t" + score)
:return: a numpy array for the model scores, and one for the true scores
"""
y_hat_tmp = {}
errors = 0
with open(model_answers, 'r', encoding='utf-8') as f_in:
for line in f_in:
lemma, score = line.strip().split('\t')
if score == 'nan':
errors += 1
y_hat_tmp[lemma] = score
if errors:
print('Found %d NaN predictions' % errors)
y_hat, y = [], []
with open(true_answers, 'r', encoding='utf-8') as f_in:
for line in f_in:
lemma, score = line.strip().split('\t')
y.append(float(score))
y_hat.append(float(y_hat_tmp[lemma]))
return np.array(y_hat), np.array(y)
def eval_task1(model_answers, true_answers):
"""
Computes the Accuracy against the true binary labels as annotated by humans.
:param model_answers: path to tab-separated answer file (lemma + "\t" + score)
:param true_answers: path to tab-separated gold answer file (lemma + "\t" + score)
:return: binary classification accuracy
"""
y_hat, y = get_ys(model_answers, true_answers)
accuracy = np.sum(np.equal(y_hat, y)) / len(y)
return accuracy
def eval_task2(model_answers, true_answers):
"""
Computes the Spearman's correlation coefficient against the true rank as annotated by humans
:param model_answers: path to tab-separated answer file (lemma + "\t" + score)
:param true_answers: path to tab-separated gold answer file (lemma + "\t" + score)
:return: (Spearman's correlation coefficient, p-value)
"""
y_hat, y = get_ys(model_answers, true_answers)
r, p = spearmanr(y_hat, y, nan_policy='omit')
return r, p
def main():
"""
Evaluate lexical semantic change detection results.
"""
# Get the arguments
args = docopt("""Evaluate lexical semantic change detection results.
Usage:
eval.py <modelAnsPath1> <modelAnsPath2> <trueAnsPath1> <trueAnsPath2>
Arguments:
<modelAnsPath1> = path to tab-separated answer file for Task 1 (lemma + "\t" + binary score)
<modelAnsPath2> = path to tab-separated answer file for Task 2 (lemma + "\t" + corr. coeff.)
<trueAnsPath1> = path to tab-separated gold answer file for Task 1 (lemma + "\t" + binary score)
<trueAnsPath2> = path to tab-separated gold answer file for Task 2 (lemma + "\t" + corr. coeff.)
""")
modelAnsPath1 = args['<modelAnsPath1>']
modelAnsPath2 = args['<modelAnsPath2>']
trueAnsPath1 = args['<trueAnsPath1>']
trueAnsPath2 = args['<trueAnsPath2>']
print(f"Task\tScore\tp-value")
if os.path.isfile(modelAnsPath1):
acc = eval_task1(modelAnsPath1, trueAnsPath1)
print(f"Task 1\t{acc:.3f}\t")
else:
print('Task 1 predictions not found: %s' %modelAnsPath1)
pass
if os.path.isfile(modelAnsPath2):
r, p = eval_task2(modelAnsPath2, trueAnsPath2)
print(f"Task 2\t{r:.3f}\t{p:.3f}")
else:
print('Task 2 predictions not found: %s' %modelAnsPath2)
if __name__ == '__main__':
main()