Skip to content

Commit

Permalink
Update with training info
Browse files Browse the repository at this point in the history
  • Loading branch information
Erotemic committed Oct 15, 2023
1 parent fae2795 commit 15c01ad
Show file tree
Hide file tree
Showing 7 changed files with 626 additions and 163 deletions.
193 changes: 117 additions & 76 deletions README.rst

Large diffs are not rendered by default.

90 changes: 90 additions & 0 deletions make_splits.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
def make_splits():
import shitspotter
import kwcoco
coco_fpath = shitspotter.util.find_shit_coco_fpath()
dset = kwcoco.CocoDataset(coco_fpath)

gids_with_annots = [gid for gid, aids in dset.index.gid_to_aids.items() if len(aids) > 0]
images_with_annots = dset.images(gids_with_annots)

import ubelt as ub
from kwutil import util_time
datetimes = list(map(util_time.coerce_datetime, images_with_annots.lookup('datetime', None)))
year_to_gids = ub.group_items(images_with_annots, [d.year for d in datetimes])

# Group images into videos (do this with the image pairs)
for year, gids in year_to_gids.items():

video_name = f'video_{year}'
if video_name not in dset.index.name_to_video:
video_id = dset.add_video(name=video_name)
else:
video_id = dset.index.name_to_video[video_name]['id']

video = dset.index.videos[video_id]

video_images = dset.images(gids)

for idx, img in enumerate(video_images.objs):
img['frame_index'] = idx
img['video_id'] = video_id
img['sensor_coarse'] = 'phone'
img['datetime_captured'] = img['datetime']
img['channels'] = 'red|green|blue'

# hack
video['width'] = img['width']
video['height'] = img['height']

dset._build_index()
dset.conform()

vali_gids = []
train_gids = []

for year, gids in year_to_gids.items():
if year <= 2020:
vali_gids.extend(gids)
else:
train_gids.extend(gids)

groups = [g for k, g in sorted(year_to_gids.items())]
train_gids = list(ub.flatten(groups[1:]))
vali_gids = list(ub.flatten(groups[:1]))

train_split = dset.subset(train_gids)
vali_split = dset.subset(vali_gids)

def build_code(coco_dset):
hashid = coco_dset._build_hashid()[0:8]
return f'imgs{coco_dset.n_images}_{hashid}'

# coco_dset = vali_split
fname = ('vali_' + build_code(vali_split) + '.kwcoco.zip')
bundle_dpath = ub.Path(dset.fpath).parent
vali_split.fpath = bundle_dpath / fname

fname = ('train_' + build_code(train_split) + '.kwcoco.zip')
fname = ('train_' + build_code(train_split) + '.kwcoco.zip')
train_split.fpath = bundle_dpath / fname
print(f'vali_split.fpath={vali_split.fpath}')
print(f'train_split.fpath={train_split.fpath}')

train_split.conform()
vali_split.conform()

vali_split.dump()
train_split.dump()

ub.symlink(train_split.fpath, link_path=train_split.fpath.parent / 'train.kwcoco.zip', overwrite=True, verbose=3)
ub.symlink(vali_split.fpath, link_path=vali_split.fpath.parent / 'vali.kwcoco.zip', overwrite=True, verbose=3)

# See ~/code/ndsampler/train.sh


if __name__ == '__main__':
"""
CommandLine:
python ~/code/shitspotter/make_splits.py
"""
make_splits()
1 change: 1 addition & 0 deletions shitspotter/cid_revisions.txt
Original file line number Diff line number Diff line change
Expand Up @@ -17,3 +17,4 @@ bafybeiflkm37altah2ey2jxko7kngquwfugyo4cl36y7xjf7o2lbrgucbi
bafybeiczi4pn4na2iw7c66bpbf5rdr3ua3grp2qvjgrmnuzqabjjim4o2q
bafybeiczi4pn4na2iw7c66bpbf5rdr3ua3grp2qvjgrmnuzqabjjim4o2q
bafybeieahblb6aafomi72gnheu3ihom7nobdad4t6jcrrwhd5eb3wxkrgy
bafybeief7tmoarwmd26b2petx7crtvdnz6ucccek5wpwxwdvfydanfukna
139 changes: 137 additions & 2 deletions shitspotter/gather.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,11 @@ def main():
all_fpaths = []
change_point = dateutil.parser.parse('2021-05-11T120000')
walk_prog = ub.ProgIter(desc='walking')

extensions = set()

block_extensions = ('.mp4', '.json')

with walk_prog:
for r, ds, fs in os.walk(dpath, followlinks=True):
walk_prog.step()
Expand All @@ -48,11 +53,18 @@ def main():
for fname in fs:
gpath = join(r, fname)
all_fpaths.append(gpath)
if fname.endswith('.mp4'):
if fname.endswith(block_extensions):
continue
if fname in seen:
print('SEEN fname = {!r}'.format(fname))
continue

ext = fname.split('.')[-1]

if ext == 'shitspotter':
raise Exception

extensions.add(ext)
seen.add(fname)
rows.append({
'gpath': gpath,
Expand Down Expand Up @@ -143,7 +155,10 @@ def main():
raise
# TODO: exif 'OffsetTime': '-05:00',
row['datetime'] = dt.isoformat()
# exif_ori = exif.get('Orientation', None)

exif_ori = exif.get('Orientation', None)
row['exif_ori'] = exif_ori

# print('exif_ori = {!r}'.format(exif_ori))
geos_point = exif.get('GPSInfo', None)
if geos_point is not None and 'GPSLatitude' in geos_point:
Expand Down Expand Up @@ -178,11 +193,131 @@ def main():
coco_dset._ensure_json_serializable()
print('coco_dset.fpath = {!r}'.format(coco_dset.fpath))
coco_dset.reroot(absolute=False)
coco_dset.clear_annotations()

ADD_LABELME_ANNOTS = 1
if ADD_LABELME_ANNOTS:
import json
import kwimage
json_fpaths = sorted((dpath / 'assets').glob('*/*.json'))
for fpath in ub.ProgIter(json_fpaths):

if True:
# Fixup labelme json files
# Remove image data, fix bad labels
labelme_data = json.loads(fpath.read_text())
needs_write = 0
if labelme_data.get('imageData', None) is not None:
labelme_data['imageData'] = None
needs_write = 1

for shape in labelme_data['shapes']:
if shape['label'] == 'poop;':
shape['label'] = 'poop'

if needs_write:
fpath.write_text(json.dumps(labelme_data))

labelme_data = json.loads(fpath.read_text())
imginfo, annsinfo = labelme_to_coco_structure(labelme_data)
image_name = imginfo['file_name'].rsplit('.', 1)[0]
img = coco_dset.index.name_to_img[image_name]

# Construct the inverted exif transform
# (From exif space -> raw space)
rot_ccw = 0
flip_axis = None
if img['exif_ori'] == 8:
rot_ccw = 3
elif img['exif_ori'] == 3:
rot_ccw = 2
elif img['exif_ori'] == 6:
rot_ccw = 1
elif img['exif_ori'] == 7:
flip_axis = 1
rot_ccw = 3
elif img['exif_ori'] == 4:
flip_axis = 1
rot_ccw = 2
elif img['exif_ori'] == 5:
flip_axis = 1
rot_ccw = 1
exif_canvas_dsize = (labelme_data['imageWidth'], labelme_data['imageHeight'])
inv_exif = kwimage.Affine.fliprot(
flip_axis=flip_axis, rot_k=rot_ccw,
canvas_dsize=exif_canvas_dsize
)

for ann in annsinfo:
ann = ann.copy()
poly = kwimage.Polygon.from_coco(ann['segmentation'])

if not inv_exif.isclose_identity():
# if img['id'] not in {0}:
# raise Exception(img['id'])
# LabelMe Polygons are annotated in EXIF space, but
# we need them in raw space for kwcoco.
poly = poly.warp(inv_exif)

ann['segmentation'] = poly.to_coco(style='new')
ann['bbox'] = poly.box().quantize().to_coco()

catname = ann.pop('category_name')
cid = coco_dset.ensure_category(catname)
ann['category_id'] = cid
ann['image_id'] = img['id']
coco_dset.add_annotation(**ann)

if 0:
import kwplot
kwplot.autompl(recheck=1, force='QtAgg')
if not inv_exif.isclose_identity():
coco_dset.show_image(img['id'])
if img['id'] not in {0, 1575, 7, 1554}:
raise Exception(img['id'])
#
coco_dset.dump(coco_dset.fpath, newlines=True)


def labelme_to_coco_structure(labelme_data):
import kwimage
import numpy as np
img = {
'file_name': labelme_data['imagePath'],
'width': labelme_data['imageWidth'],
'height': labelme_data['imageHeight'],
}
anns = []
for shape in labelme_data['shapes']:
points = shape['points']

if shape['group_id'] is not None:
raise NotImplementedError('groupid')

if shape['description']:
raise NotImplementedError('desc')
shape_type = shape['shape_type']

if shape_type != 'polygon':
raise NotImplementedError(shape_type)

flags = shape['flags']
if flags:
raise NotImplementedError('flags')

category_name = shape['label']
poly = kwimage.Polygon.coerce(np.array(points))

ann = {
'category_name': category_name,
'bbox': poly.box().quantize().to_coco(),
'segmentation': poly.to_coco(style='new'),
}
anns.append(ann)

return img, anns


if __name__ == '__main__':
"""
CommandLine:
Expand Down
30 changes: 22 additions & 8 deletions shitspotter/matching.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,11 +42,17 @@ def autofind_pair_hueristic(coco_dset=None):
# from vtool_ibeis.matching import VSONE_FEAT_CONFIG

image_df = pd.DataFrame(coco_dset.dataset['images'])
ordered_gids = image_df.sort_values('datetime').id.tolist()

has_annots = [len(aids) > 0 for aids in coco_dset.images(image_df['id']).aids]
image_df['has_annots'] = has_annots
image_df = image_df.sort_values('datetime')

ordered_gids = image_df.id.tolist()
feat_cfg = {
'rotation_invariance': True,
'affine_invariance': True,
}
image_df = image_df.set_index('id', drop=False)

# Fails on 31, 32

Expand Down Expand Up @@ -94,8 +100,8 @@ def matchable_image(gid):
pair = (coco_img1['name'], coco_img2['name'])
key = ub.urepr(pair, compact=1)
if key not in existing_keys:
dt1 = dateutil.parser.parse(coco_img1['datetime'])
dt2 = dateutil.parser.parse(coco_img2['datetime'])
dt1 = coco_img1.datetime
dt2 = coco_img2.datetime
delta = dt1 - dt2
delta_seconds = delta.total_seconds()
if delta_seconds < compare_time_thresh:
Expand Down Expand Up @@ -137,7 +143,7 @@ def matchable_image(gid):
key = ub.urepr((match['name1'], match['name2']), compact=1)
image_matches[key] = match

# Save the match table
# Save the match table shelf
image_matches.sync()

# coco_dset.dump(coco_dset.fpath, newlines=True)
Expand Down Expand Up @@ -197,6 +203,8 @@ def matchable_image(gid):
good_pairwise_idxs.append(idx + 1)
idx += 2
else:
# import xdev
# xdev.embed()
bad_pairwise_items += 1
idx += 1

Expand Down Expand Up @@ -264,6 +272,11 @@ def matchable_image(gid):
total_imgs = len(coco_dset.imgs)
print(f'total_images = {total_imgs}')

num_images_with_annots = sum([bool(a) for a in coco_dset.images().annots])
num_annots = coco_dset.n_annots
print('num_images_with_annots = {}'.format(ub.urepr(num_images_with_annots, nl=1)))
print('num_annots = {}'.format(ub.urepr(num_annots, nl=1)))

if 1:
import datetime as datetime_mod
today = datetime_mod.datetime.now().date()
Expand All @@ -272,12 +285,13 @@ def matchable_image(gid):
'# Images': total_imgs,
'# Estimated Groups': total_estimated_number_of_tups,
'# Registered Groups': total_matchable_tups,
'# Annotated Images': num_images_with_annots,
}
print('New row for README')
print('| {:<12s}| {:<8s} | {:<18s} | {:<22s}|'.format(*list(row.keys())))
print('+=============+==========+=====================+=======================+')
print('| {:<12s}| {:<7d} | ~{:<17d} | {:<22d}|'.format(*list(row.values())))
print('+-------------+----------+---------------------+-----------------------+')
print('| {:<12s}| {:<8s} | {:<18s} | {:<22s}| {:<22s}|'.format(*list(row.keys())))
print('+=============+==========+=====================+=======================+=======================+')
print('| {:<12s}| {:<7d} | ~{:<17d} | {:<22d}| {:<22d}|'.format(*list(row.values())))
print('+-------------+----------+---------------------+-----------------------+-----------------------+')
# import tabulate
# import pandas as pd
# df = pd.DataFrame([row])
Expand Down
Loading

0 comments on commit 15c01ad

Please sign in to comment.