-
Notifications
You must be signed in to change notification settings - Fork 0
/
stats.py
110 lines (88 loc) · 3.27 KB
/
stats.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
import json
import sys
import argparse
import pandas as pd
from collections import defaultdict
def to_pandas(fun):
def f(train, val, perc=True, **kwargs):
to_train = fun(train, **kwargs)
to_val = fun(val, **kwargs)
merged = merge_dict(to_train, to_val)
data = pd.DataFrame.from_dict(
merged, orient='index', columns=['train', 'val'])
if perc:
for k in data.keys():
data[k] = data[k] / data[k].sum()
return data.sort_index()
return f
def cat_id2name(content):
cats = defaultdict(int)
for cat in content['categories']:
cats[cat['id']] = cat['name']
return cats
def merge_dict(d1, d2):
d3 = dict()
keys = set(d1.keys()) | set(d2.keys())
for k in keys:
d3[k] = [d1.get(k, 0), d2.get(k, 0)]
return d3
def filter_ann_by_image_id(content, img_id):
return [ann for ann in content['annotations'] if ann['image_id'] == img_id]
@to_pandas
def instances_per_cat(content):
inst = defaultdict(int)
cats = cat_id2name(content)
for ann in content['annotations']:
cat = cats[ann['category_id']]
inst[cat] += 1
return inst
@to_pandas
def count_instances_per_img(content):
count = defaultdict(int)
for ann in content['images']:
anns = filter_ann_by_image_id(content, ann['id'])
count[len(anns)] += 1
return count
@to_pandas
def count_cats_per_img(content):
count = defaultdict(int)
for ann in content['images']:
anns = filter_ann_by_image_id(content, ann['id'])
cats = set([ann['category_id'] for ann in anns])
count[len(cats)] += 1
return count
@to_pandas
def instance_size(content, delta=0.1):
count = defaultdict(int)
for img in content['images']:
anns = filter_ann_by_image_id(content, img['id'])
for ann in anns:
perc_size = (ann['bbox'][2] * ann['bbox'][3]) / \
(img['width'] * img['height'])
idx = min(1., (perc_size // delta) * delta + delta)
idx = "{:.3f}".format(idx)
count[idx] += 1
return count
parser = argparse.ArgumentParser(description='COCO Subset')
parser.add_argument('train_json', help='Input COCO train annotation file')
parser.add_argument('val_json', help='Input COCO validation annotation file')
parser.add_argument('action', help='Action to perform', choices=[
'instances_per_category', 'cats_per_img', 'instances_per_img',
'instance_size'])
parser.add_argument('-d', dest='delta', default=0.05, type=float,
help='delta percentage')
parser.add_argument('-p', dest='perc', default=False, action='store_true',
help='show percentages instead of absolute values')
args = parser.parse_args(sys.argv[1:])
with open(args.train_json, 'r') as f:
train = json.load(f)
with open(args.val_json, 'r') as f:
val = json.load(f)
if args.action == 'instances_per_category':
print(instances_per_cat(train, val, args.perc).to_csv())
elif args.action == 'cats_per_img':
print(count_cats_per_img(train, val, args.perc).to_csv())
elif args.action == 'instances_per_img':
print(count_instances_per_img(train, val, args.perc).to_csv())
elif args.action == 'instance_size':
print(instance_size(train, val, args.perc, delta=args.delta).to_csv())