-
Notifications
You must be signed in to change notification settings - Fork 2
/
coco_stats.py
28 lines (20 loc) · 1.22 KB
/
coco_stats.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
from methods import parallel_sentences_from_file, parallel_stats, load_json, save_json, sentence_stats
train = parallel_sentences_from_file('./Data/COCO/Processed/tokenized_train2014.json',
tagged=False, # Don't load tags.
lower=True) # Lowercase all descriptions.
val = parallel_sentences_from_file('./Data/COCO/Processed/tagged_val2014.json',
tagged=False, # Don't load tags.
lower=True) # Lowercase all descriptions.
# Compute stats for train and val data.
train_stats = parallel_stats(train)
val_stats = parallel_stats(val)
# Extra stats.
train_data = load_json('./Data/COCO/Processed/tokenized_train2014.json')
train_descriptions = [entry['caption'] for entry in train_data['annotations']]
val_data = load_json('./Data/COCO/Processed/tagged_val2014.json')
val_descriptions = [entry['caption'] for entry in val_data['annotations']]
extra_stats = sentence_stats(train_descriptions, val_descriptions)
val_stats.update(extra_stats)
# Save data to file.
save_json(train_stats, './Data/COCO/Processed/train_stats.json')
save_json(val_stats, './Data/COCO/Processed/val_stats.json')