-
Notifications
You must be signed in to change notification settings - Fork 13
/
ficeval.py
91 lines (79 loc) · 2.97 KB
/
ficeval.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
"""
Wrapper for evaluation on CIDEr, ROUGE_L, METEOR and Bleu_N
using coco-caption repo https://github.com/tylin/coco-caption
class COCOScorer is taken from https://github.com/yaoli/arctic-capgen-vid
"""
import pdb
from evalcap.bleu.bleu import Bleu
from evalcap.rouge.rouge import Rouge
from evalcap.cider.cider import Cider
from evalcap.meteor.meteor import Meteor
from evalcap.tokenizer.ptbtokenizer import PTBTokenizer
# Define a context manager to suppress stdout and stderr.
class FICScorer(object):
def __init__(self):
self.imgToEval = {}
self.eval = {}
print('init COCO-EVAL scorer')
def score(self, GT, RES, IDs):
gts = {}
res = {}
for ID in IDs:
gts[ID] = GT[ID]
res[ID] = RES[ID]
print('tokenization...')
tokenizer = PTBTokenizer()
gts = tokenizer.tokenize(gts)
res = tokenizer.tokenize(res)
# =================================================
# Set up scorers
# =================================================
print('setting up scorers...')
scorers = [
(Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]),
(Meteor(), "METEOR"),
(Rouge(), "ROUGE_L"),
(Cider(), "CIDEr"),
# (Spice(), "SPICE")
]
# =================================================
# Compute scores
# =================================================
for scorer, method in scorers:
print('computing %s score...' % (scorer.method()))
score, scores = scorer.compute_score(gts, res)
if type(method) == list:
for sc, scs, m in zip(score, scores, method):
self.setEval(sc, m)
self.setImgToEvalImgs(scs, IDs, m)
print("%s: %0.3f" % (m, sc))
else:
self.setEval(score, method)
self.setImgToEvalImgs(scores, IDs, method)
print("%s: %0.3f" % (method, score))
return self.eval
def setEval(self, score, method):
self.eval[method] = score
def setImgToEvalImgs(self, scores, imgIds, method):
for imgId, score in zip(imgIds, scores):
if imgId not in self.imgToEval:
self.imgToEval[imgId] = {}
self.imgToEval[imgId]["image_id"] = imgId
self.imgToEval[imgId][method] = score
def score(ref, sample):
# ref and sample are both dict
scorers = [
(Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]),
(Rouge(), "ROUGE_L"),
(Cider(), "CIDEr")
]
final_scores = {}
for scorer, method in scorers:
print('computing %s score with FIC-EVAL...' % (scorer.method()))
score, scores = scorer.compute_score(ref, sample)
if type(score) == list:
for m, s in zip(method, score):
final_scores[m] = s
else:
final_scores[method] = score
return final_scores