diff --git a/configs/_base_/datasets/ubody3d.py b/configs/_base_/datasets/ubody3d.py new file mode 100644 index 0000000000..9242559ea1 --- /dev/null +++ b/configs/_base_/datasets/ubody3d.py @@ -0,0 +1,958 @@ +dataset_info = dict( + dataset_name='ubody3d', + paper_info=dict( + author='Jing Lin, Ailing Zeng, Haoqian Wang, Lei Zhang, Yu Li', + title='One-Stage 3D Whole-Body Mesh Recovery with Component Aware' + 'Transformer', + container='IEEE Computer Society Conference on Computer Vision and ' + 'Pattern Recognition (CVPR)', + year='2023', + homepage='https://github.com/IDEA-Research/OSX', + ), + keypoint_info={ + 0: + dict(name='Pelvis', id=0, color=[0, 255, 0], type='', swap=''), + 1: + dict( + name='L_Hip', id=1, color=[0, 255, 0], type='lower', swap='R_Hip'), + 2: + dict( + name='R_Hip', id=2, color=[0, 255, 0], type='lower', swap='L_Hip'), + 3: + dict( + name='L_Knee', + id=3, + color=[0, 255, 0], + type='lower', + swap='R_Knee'), + 4: + dict( + name='R_Knee', + id=4, + color=[0, 255, 0], + type='lower', + swap='L_Knee'), + 5: + dict( + name='L_Ankle', + id=5, + color=[0, 255, 0], + type='lower', + swap='R_Ankle'), + 6: + dict( + name='R_Ankle', + id=6, + color=[0, 255, 0], + type='lower', + swap='L_Ankle'), + 7: + dict(name='Neck', id=7, color=[0, 255, 0], type='upper', swap=''), + 8: + dict( + name='L_Shoulder', + id=8, + color=[0, 255, 0], + type='upper', + swap='R_Shoulder'), + 9: + dict( + name='R_Shoulder', + id=9, + color=[0, 255, 0], + type='upper', + swap='L_Shoulder'), + 10: + dict( + name='L_Elbow', + id=10, + color=[0, 255, 0], + type='upper', + swap='R_Elbow'), + 11: + dict( + name='R_Elbow', + id=11, + color=[0, 255, 0], + type='upper', + swap='L_Elbow'), + 12: + dict( + name='L_Wrist', + id=12, + color=[0, 255, 0], + type='upper', + swap='R_Wrist'), + 13: + dict( + name='R_Wrist', + id=13, + color=[0, 255, 0], + type='upper', + swap='L_Wrist'), + 14: + dict( + name='L_Big_toe', + id=14, + color=[0, 255, 0], + type='lower', + swap='R_Big_toe'), + 15: + dict( + name='L_Small_toe', + id=15, + color=[0, 255, 0], + type='lower', + swap='R_Small_toe'), + 16: + dict( + name='L_Heel', + id=16, + color=[0, 255, 0], + type='lower', + swap='R_Heel'), + 17: + dict( + name='R_Big_toe', + id=17, + color=[0, 255, 0], + type='lower', + swap='L_Big_toe'), + 18: + dict( + name='R_Small_toe', + id=18, + color=[0, 255, 0], + type='lower', + swap='L_Small_toe'), + 19: + dict( + name='R_Heel', + id=19, + color=[0, 255, 0], + type='lower', + swap='L_Heel'), + 20: + dict( + name='L_Ear', id=20, color=[0, 255, 0], type='upper', + swap='R_Ear'), + 21: + dict( + name='R_Ear', id=21, color=[0, 255, 0], type='upper', + swap='L_Ear'), + 22: + dict(name='L_Eye', id=22, color=[0, 255, 0], type='', swap='R_Eye'), + 23: + dict(name='R_Eye', id=23, color=[0, 255, 0], type='', swap='L_Eye'), + 24: + dict(name='Nose', id=24, color=[0, 255, 0], type='upper', swap=''), + 25: + dict( + name='L_Thumb_1', + id=25, + color=[255, 128, 0], + type='', + swap='R_Thumb_1'), + 26: + dict( + name='L_Thumb_2', + id=26, + color=[255, 128, 0], + type='', + swap='R_Thumb_2'), + 27: + dict( + name='L_Thumb_3', + id=27, + color=[255, 128, 0], + type='', + swap='R_Thumb_3'), + 28: + dict( + name='L_Thumb_4', + id=28, + color=[255, 128, 0], + type='', + swap='R_Thumb_4'), + 29: + dict( + name='L_Index_1', + id=29, + color=[255, 128, 0], + type='', + swap='R_Index_1'), + 30: + dict( + name='L_Index_2', + id=30, + color=[255, 128, 0], + type='', + swap='R_Index_2'), + 31: + dict( + name='L_Index_3', + id=31, + color=[255, 128, 0], + type='', + swap='R_Index_3'), + 32: + dict( + name='L_Index_4', + id=32, + color=[255, 128, 0], + type='', + swap='R_Index_4'), + 33: + dict( + name='L_Middle_1', + id=33, + color=[255, 128, 0], + type='', + swap='R_Middle_1'), + 34: + dict( + name='L_Middle_2', + id=34, + color=[255, 128, 0], + type='', + swap='R_Middle_2'), + 35: + dict( + name='L_Middle_3', + id=35, + color=[255, 128, 0], + type='', + swap='R_Middle_3'), + 36: + dict( + name='L_Middle_4', + id=36, + color=[255, 128, 0], + type='', + swap='R_Middle_4'), + 37: + dict( + name='L_Ring_1', + id=37, + color=[255, 128, 0], + type='', + swap='R_Ring_1'), + 38: + dict( + name='L_Ring_2', + id=38, + color=[255, 128, 0], + type='', + swap='R_Ring_2'), + 39: + dict( + name='L_Ring_3', + id=39, + color=[255, 128, 0], + type='', + swap='R_Ring_3'), + 40: + dict( + name='L_Ring_4', + id=40, + color=[255, 128, 0], + type='', + swap='R_Ring_4'), + 41: + dict( + name='L_Pinky_1', + id=41, + color=[255, 128, 0], + type='', + swap='R_Pinky_1'), + 42: + dict( + name='L_Pinky_2', + id=42, + color=[255, 128, 0], + type='', + swap='R_Pinky_2'), + 43: + dict( + name='L_Pinky_3', + id=43, + color=[255, 128, 0], + type='', + swap='R_Pinky_3'), + 44: + dict( + name='L_Pinky_4', + id=44, + color=[255, 128, 0], + type='', + swap='R_Pinky_4'), + 45: + dict( + name='R_Thumb_1', + id=45, + color=[255, 128, 0], + type='', + swap='L_Thumb_1'), + 46: + dict( + name='R_Thumb_2', + id=46, + color=[255, 128, 0], + type='', + swap='L_Thumb_2'), + 47: + dict( + name='R_Thumb_3', + id=47, + color=[255, 128, 0], + type='', + swap='L_Thumb_3'), + 48: + dict( + name='R_Thumb_4', + id=48, + color=[255, 128, 0], + type='', + swap='L_Thumb_4'), + 49: + dict( + name='R_Index_1', + id=49, + color=[255, 128, 0], + type='', + swap='L_Index_1'), + 50: + dict( + name='R_Index_2', + id=50, + color=[255, 128, 0], + type='', + swap='L_Index_2'), + 51: + dict( + name='R_Index_3', + id=51, + color=[255, 128, 0], + type='', + swap='L_Index_3'), + 52: + dict( + name='R_Index_4', + id=52, + color=[255, 128, 0], + type='', + swap='L_Index_4'), + 53: + dict( + name='R_Middle_1', + id=53, + color=[255, 128, 0], + type='', + swap='L_Middle_1'), + 54: + dict( + name='R_Middle_2', + id=54, + color=[255, 128, 0], + type='', + swap='L_Middle_2'), + 55: + dict( + name='R_Middle_3', + id=55, + color=[255, 128, 0], + type='', + swap='L_Middle_3'), + 56: + dict( + name='R_Middle_4', + id=56, + color=[255, 128, 0], + type='', + swap='L_Middle_4'), + 57: + dict( + name='R_Ring_1', + id=57, + color=[255, 128, 0], + type='', + swap='L_Ring_1'), + 58: + dict( + name='R_Ring_2', + id=58, + color=[255, 128, 0], + type='', + swap='L_Ring_2'), + 59: + dict( + name='R_Ring_3', + id=59, + color=[255, 128, 0], + type='', + swap='L_Ring_3'), + 60: + dict( + name='R_Ring_4', + id=60, + color=[255, 128, 0], + type='', + swap='L_Ring_4'), + 61: + dict( + name='R_Pinky_1', + id=61, + color=[255, 128, 0], + type='', + swap='L_Pinky_1'), + 62: + dict( + name='R_Pinky_2', + id=62, + color=[255, 128, 0], + type='', + swap='L_Pinky_2'), + 63: + dict( + name='R_Pinky_3', + id=63, + color=[255, 128, 0], + type='', + swap='L_Pinky_3'), + 64: + dict( + name='R_Pinky_4', + id=64, + color=[255, 128, 0], + type='', + swap='L_Pinky_4'), + 65: + dict(name='Face_1', id=65, color=[255, 255, 255], type='', swap=''), + 66: + dict(name='Face_2', id=66, color=[255, 255, 255], type='', swap=''), + 67: + dict( + name='Face_3', + id=67, + color=[255, 255, 255], + type='', + swap='Face_4'), + 68: + dict( + name='Face_4', + id=68, + color=[255, 255, 255], + type='', + swap='Face_3'), + 69: + dict( + name='Face_5', + id=69, + color=[255, 255, 255], + type='', + swap='Face_14'), + 70: + dict( + name='Face_6', + id=70, + color=[255, 255, 255], + type='', + swap='Face_13'), + 71: + dict( + name='Face_7', + id=71, + color=[255, 255, 255], + type='', + swap='Face_12'), + 72: + dict( + name='Face_8', + id=72, + color=[255, 255, 255], + type='', + swap='Face_11'), + 73: + dict( + name='Face_9', + id=73, + color=[255, 255, 255], + type='', + swap='Face_10'), + 74: + dict( + name='Face_10', + id=74, + color=[255, 255, 255], + type='', + swap='Face_9'), + 75: + dict( + name='Face_11', + id=75, + color=[255, 255, 255], + type='', + swap='Face_8'), + 76: + dict( + name='Face_12', + id=76, + color=[255, 255, 255], + type='', + swap='Face_7'), + 77: + dict( + name='Face_13', + id=77, + color=[255, 255, 255], + type='', + swap='Face_6'), + 78: + dict( + name='Face_14', + id=78, + color=[255, 255, 255], + type='', + swap='Face_5'), + 79: + dict(name='Face_15', id=79, color=[255, 255, 255], type='', swap=''), + 80: + dict(name='Face_16', id=80, color=[255, 255, 255], type='', swap=''), + 81: + dict(name='Face_17', id=81, color=[255, 255, 255], type='', swap=''), + 82: + dict(name='Face_18', id=82, color=[255, 255, 255], type='', swap=''), + 83: + dict( + name='Face_19', + id=83, + color=[255, 255, 255], + type='', + swap='Face_23'), + 84: + dict( + name='Face_20', + id=84, + color=[255, 255, 255], + type='', + swap='Face_22'), + 85: + dict(name='Face_21', id=85, color=[255, 255, 255], type='', swap=''), + 86: + dict( + name='Face_22', + id=86, + color=[255, 255, 255], + type='', + swap='Face_20'), + 87: + dict( + name='Face_23', + id=87, + color=[255, 255, 255], + type='', + swap='Face_19'), + 88: + dict( + name='Face_24', + id=88, + color=[255, 255, 255], + type='', + swap='Face_33'), + 89: + dict( + name='Face_25', + id=89, + color=[255, 255, 255], + type='', + swap='Face_32'), + 90: + dict( + name='Face_26', + id=90, + color=[255, 255, 255], + type='', + swap='Face_31'), + 91: + dict( + name='Face_27', + id=91, + color=[255, 255, 255], + type='', + swap='Face_30'), + 92: + dict( + name='Face_28', + id=92, + color=[255, 255, 255], + type='', + swap='Face_35'), + 93: + dict( + name='Face_29', + id=93, + color=[255, 255, 255], + type='', + swap='Face_34'), + 94: + dict( + name='Face_30', + id=94, + color=[255, 255, 255], + type='', + swap='Face_27'), + 95: + dict( + name='Face_31', + id=95, + color=[255, 255, 255], + type='', + swap='Face_26'), + 96: + dict( + name='Face_32', + id=96, + color=[255, 255, 255], + type='', + swap='Face_25'), + 97: + dict( + name='Face_33', + id=97, + color=[255, 255, 255], + type='', + swap='Face_24'), + 98: + dict( + name='Face_34', + id=98, + color=[255, 255, 255], + type='', + swap='Face_29'), + 99: + dict( + name='Face_35', + id=99, + color=[255, 255, 255], + type='', + swap='Face_28'), + 100: + dict( + name='Face_36', + id=100, + color=[255, 255, 255], + type='', + swap='Face_42'), + 101: + dict( + name='Face_37', + id=101, + color=[255, 255, 255], + type='', + swap='Face_41'), + 102: + dict( + name='Face_38', + id=102, + color=[255, 255, 255], + type='', + swap='Face_40'), + 103: + dict(name='Face_39', id=103, color=[255, 255, 255], type='', swap=''), + 104: + dict( + name='Face_40', + id=104, + color=[255, 255, 255], + type='', + swap='Face_38'), + 105: + dict( + name='Face_41', + id=105, + color=[255, 255, 255], + type='', + swap='Face_37'), + 106: + dict( + name='Face_42', + id=106, + color=[255, 255, 255], + type='', + swap='Face_36'), + 107: + dict( + name='Face_43', + id=107, + color=[255, 255, 255], + type='', + swap='Face_47'), + 108: + dict( + name='Face_44', + id=108, + color=[255, 255, 255], + type='', + swap='Face_46'), + 109: + dict(name='Face_45', id=109, color=[255, 255, 255], type='', swap=''), + 110: + dict( + name='Face_46', + id=110, + color=[255, 255, 255], + type='', + swap='Face_44'), + 111: + dict( + name='Face_47', + id=111, + color=[255, 255, 255], + type='', + swap='Face_43'), + 112: + dict( + name='Face_48', + id=112, + color=[255, 255, 255], + type='', + swap='Face_52'), + 113: + dict( + name='Face_49', + id=113, + color=[255, 255, 255], + type='', + swap='Face_51'), + 114: + dict(name='Face_50', id=114, color=[255, 255, 255], type='', swap=''), + 115: + dict( + name='Face_51', + id=115, + color=[255, 255, 255], + type='', + swap='Face_49'), + 116: + dict( + name='Face_52', + id=116, + color=[255, 255, 255], + type='', + swap='Face_48'), + 117: + dict( + name='Face_53', + id=117, + color=[255, 255, 255], + type='', + swap='Face_55'), + 118: + dict(name='Face_54', id=118, color=[255, 255, 255], type='', swap=''), + 119: + dict( + name='Face_55', + id=119, + color=[255, 255, 255], + type='', + swap='Face_53'), + 120: + dict( + name='Face_56', + id=120, + color=[255, 255, 255], + type='', + swap='Face_72'), + 121: + dict( + name='Face_57', + id=121, + color=[255, 255, 255], + type='', + swap='Face_71'), + 122: + dict( + name='Face_58', + id=122, + color=[255, 255, 255], + type='', + swap='Face_70'), + 123: + dict( + name='Face_59', + id=123, + color=[255, 255, 255], + type='', + swap='Face_69'), + 124: + dict( + name='Face_60', + id=124, + color=[255, 255, 255], + type='', + swap='Face_68'), + 125: + dict( + name='Face_61', + id=125, + color=[255, 255, 255], + type='', + swap='Face_67'), + 126: + dict( + name='Face_62', + id=126, + color=[255, 255, 255], + type='', + swap='Face_66'), + 127: + dict( + name='Face_63', + id=127, + color=[255, 255, 255], + type='', + swap='Face_65'), + 128: + dict(name='Face_64', id=128, color=[255, 255, 255], type='', swap=''), + 129: + dict( + name='Face_65', + id=129, + color=[255, 255, 255], + type='', + swap='Face_63'), + 130: + dict( + name='Face_66', + id=130, + color=[255, 255, 255], + type='', + swap='Face_62'), + 131: + dict( + name='Face_67', + id=131, + color=[255, 255, 255], + type='', + swap='Face_61'), + 132: + dict( + name='Face_68', + id=132, + color=[255, 255, 255], + type='', + swap='Face_60'), + 133: + dict( + name='Face_69', + id=133, + color=[255, 255, 255], + type='', + swap='Face_59'), + 134: + dict( + name='Face_70', + id=134, + color=[255, 255, 255], + type='', + swap='Face_58'), + 135: + dict( + name='Face_71', + id=135, + color=[255, 255, 255], + type='', + swap='Face_57'), + 136: + dict( + name='Face_72', + id=136, + color=[255, 255, 255], + type='', + swap='Face_56'), + }, + skeleton_info={ + 0: dict(link=('L_Ankle', 'L_Knee'), id=0, color=[0, 255, 0]), + 1: dict(link=('L_Knee', 'L_Hip'), id=1, color=[0, 255, 0]), + 2: dict(link=('R_Ankle', 'R_Knee'), id=2, color=[0, 255, 0]), + 3: dict(link=('R_Knee', 'R_Hip'), id=3, color=[0, 255, 0]), + 4: dict(link=('L_Hip', 'R_Hip'), id=4, color=[0, 255, 0]), + 5: dict(link=('L_Shoulder', 'L_Hip'), id=5, color=[0, 255, 0]), + 6: dict(link=('R_Shoulder', 'R_Hip'), id=6, color=[0, 255, 0]), + 7: dict(link=('L_Shoulder', 'R_Shoulder'), id=7, color=[0, 255, 0]), + 8: dict(link=('L_Shoulder', 'L_Elbow'), id=8, color=[0, 255, 0]), + 9: dict(link=('R_Shoulder', 'R_Elbow'), id=9, color=[0, 255, 0]), + 10: dict(link=('L_Elbow', 'L_Wrist'), id=10, color=[0, 255, 0]), + 11: dict(link=('R_Elbow', 'R_Wrist'), id=11, color=[255, 128, 0]), + 12: dict(link=('L_Eye', 'R_Eye'), id=12, color=[255, 128, 0]), + 13: dict(link=('Nose', 'L_Eye'), id=13, color=[255, 128, 0]), + 14: dict(link=('Nose', 'R_Eye'), id=14, color=[255, 128, 0]), + 15: dict(link=('L_Eye', 'L_Ear'), id=15, color=[255, 128, 0]), + 16: dict(link=('R_Eye', 'R_Ear'), id=16, color=[255, 128, 0]), + 17: dict(link=('L_Ear', 'L_Shoulder'), id=17, color=[255, 128, 0]), + 18: dict(link=('R_Ear', 'R_Shoulder'), id=18, color=[255, 128, 0]), + 19: dict(link=('L_Ankle', 'L_Big_toe'), id=19, color=[255, 128, 0]), + 20: dict(link=('L_Ankle', 'L_Small_toe'), id=20, color=[255, 128, 0]), + 21: dict(link=('L_Ankle', 'L_Heel'), id=21, color=[255, 128, 0]), + 22: dict(link=('R_Ankle', 'R_Big_toe'), id=22, color=[255, 128, 0]), + 23: dict(link=('R_Ankle', 'R_Small_toe'), id=23, color=[255, 128, 0]), + 24: dict(link=('R_Ankle', 'R_Heel'), id=24, color=[255, 128, 0]), + 25: dict(link=('L_Wrist', 'L_Thumb_1'), id=25, color=[255, 128, 0]), + 26: dict(link=('L_Thumb_1', 'L_Thumb_2'), id=26, color=[255, 128, 0]), + 27: dict(link=('L_Thumb_2', 'L_Thumb_3'), id=27, color=[255, 128, 0]), + 28: dict(link=('L_Thumb_3', 'L_Thumb_4'), id=28, color=[255, 128, 0]), + 29: dict(link=('L_Wrist', 'L_Index_1'), id=29, color=[255, 128, 0]), + 30: dict(link=('L_Index_1', 'L_Index_2'), id=30, color=[255, 128, 0]), + 31: + dict(link=('L_Index_2', 'L_Index_3'), id=31, color=[255, 255, 255]), + 32: + dict(link=('L_Index_3', 'L_Index_4'), id=32, color=[255, 255, 255]), + 33: dict(link=('L_Wrist', 'L_Middle_1'), id=33, color=[255, 255, 255]), + 34: + dict(link=('L_Middle_1', 'L_Middle_2'), id=34, color=[255, 255, 255]), + 35: + dict(link=('L_Middle_2', 'L_Middle_3'), id=35, color=[255, 255, 255]), + 36: + dict(link=('L_Middle_3', 'L_Middle_4'), id=36, color=[255, 255, 255]), + 37: dict(link=('L_Wrist', 'L_Ring_1'), id=37, color=[255, 255, 255]), + 38: dict(link=('L_Ring_1', 'L_Ring_2'), id=38, color=[255, 255, 255]), + 39: dict(link=('L_Ring_2', 'L_Ring_3'), id=39, color=[255, 255, 255]), + 40: dict(link=('L_Ring_3', 'L_Ring_4'), id=40, color=[255, 255, 255]), + 41: dict(link=('L_Wrist', 'L_Pinky_1'), id=41, color=[255, 255, 255]), + 42: + dict(link=('L_Pinky_1', 'L_Pinky_2'), id=42, color=[255, 255, 255]), + 43: + dict(link=('L_Pinky_2', 'L_Pinky_3'), id=43, color=[255, 255, 255]), + 44: + dict(link=('L_Pinky_3', 'L_Pinky_4'), id=44, color=[255, 255, 255]), + 45: dict(link=('R_Wrist', 'R_Thumb_1'), id=45, color=[255, 255, 255]), + 46: + dict(link=('R_Thumb_1', 'R_Thumb_2'), id=46, color=[255, 255, 255]), + 47: + dict(link=('R_Thumb_2', 'R_Thumb_3'), id=47, color=[255, 255, 255]), + 48: + dict(link=('R_Thumb_3', 'R_Thumb_4'), id=48, color=[255, 255, 255]), + 49: dict(link=('R_Wrist', 'R_Index_1'), id=49, color=[255, 255, 255]), + 50: + dict(link=('R_Index_1', 'R_Index_2'), id=50, color=[255, 255, 255]), + 51: + dict(link=('R_Index_2', 'R_Index_3'), id=51, color=[255, 255, 255]), + 52: + dict(link=('R_Index_3', 'R_Index_4'), id=52, color=[255, 255, 255]), + 53: dict(link=('R_Wrist', 'R_Middle_1'), id=53, color=[255, 255, 255]), + 54: + dict(link=('R_Middle_1', 'R_Middle_2'), id=54, color=[255, 255, 255]), + 55: + dict(link=('R_Middle_2', 'R_Middle_3'), id=55, color=[255, 255, 255]), + 56: + dict(link=('R_Middle_3', 'R_Middle_4'), id=56, color=[255, 255, 255]), + 57: dict(link=('R_Wrist', 'R_Pinky_1'), id=57, color=[255, 255, 255]), + 58: + dict(link=('R_Pinky_1', 'R_Pinky_2'), id=58, color=[255, 255, 255]), + 59: + dict(link=('R_Pinky_2', 'R_Pinky_3'), id=59, color=[255, 255, 255]), + 60: + dict(link=('R_Pinky_3', 'R_Pinky_4'), id=60, color=[255, 255, 255]), + }, + joint_weights=[1.] * 137, + sigmas=[]) diff --git a/docs/en/dataset_zoo/3d_body_keypoint.md b/docs/en/dataset_zoo/3d_body_keypoint.md index 82e21010fc..3a35e2443b 100644 --- a/docs/en/dataset_zoo/3d_body_keypoint.md +++ b/docs/en/dataset_zoo/3d_body_keypoint.md @@ -8,6 +8,7 @@ MMPose supported datasets: - [Human3.6M](#human36m) \[ [Homepage](http://vision.imar.ro/human3.6m/description.php) \] - [CMU Panoptic](#cmu-panoptic) \[ [Homepage](http://domedb.perception.cs.cmu.edu/) \] - [Campus/Shelf](#campus-and-shelf) \[ [Homepage](http://campar.in.tum.de/Chair/MultiHumanPose) \] +- [UBody](#ubody3d) \[ [Homepage](https://osx-ubody.github.io/) \] ## Human3.6M @@ -197,3 +198,100 @@ mmpose | ├── pred_shelf_maskrcnn_hrnet_coco.pkl | ├── actorsGT.mat ``` + +## UBody3d + +
+UBody (CVPR'2023) + +```bibtex +@article{lin2023one, + title={One-Stage 3D Whole-Body Mesh Recovery with Component Aware Transformer}, + author={Lin, Jing and Zeng, Ailing and Wang, Haoqian and Zhang, Lei and Li, Yu}, + booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition}, + year={2023}, +} +``` + +
+ +
+ +
+ +For [Ubody](https://github.com/IDEA-Research/OSX) dataset, videos and annotations can be downloaded from [OSX homepage](https://github.com/IDEA-Research/OSX). + +Download and extract them under $MMPOSE/data, and make them look like this: + +```text +mmpose +├── mmpose +├── docs +├── tests +├── tools +├── configs +`── data + │── UBody + ├── annotations + │   ├── ConductMusic + │   ├── Entertainment + │   ├── Fitness + │   ├── Interview + │   ├── LiveVlog + │   ├── Magic_show + │   ├── Movie + │   ├── Olympic + │   ├── Online_class + │   ├── SignLanguage + │   ├── Singing + │   ├── Speech + │   ├── TVShow + │   ├── TalkShow + │   └── VideoConference + ├── splits + │   ├── inter_scene_test_list.npy + │   └── intra_scene_test_list.npy + ├── videos + │   ├── ConductMusic + │   ├── Entertainment + │   ├── Fitness + │   ├── Interview + │   ├── LiveVlog + │   ├── Magic_show + │   ├── Movie + │   ├── Olympic + │   ├── Online_class + │   ├── SignLanguage + │   ├── Singing + │   ├── Speech + │   ├── TVShow + │   ├── TalkShow + │   └── VideoConference +``` + +Convert videos to images then split them into train/val set: + +```shell +python tools/dataset_converters/ubody_kpts_to_coco.py +``` + +Before generating 3D keypoints, you need to install SMPLX tools and download human models, please refer to [Github](https://github.com/vchoutas/smplx#installation) and [SMPLX](https://smpl-x.is.tue.mpg.de/download.php). + +```shell +pip install smplx +``` + +The directory tree of human models should be like this: + +```text +human_model_path +|── smplx + ├── SMPLX_NEUTRAL.npz + ├── SMPLX_NEUTRAL.pkl +``` + +After the above preparations are finished, execute the following script: + +```shell +python tools/dataset_converters/ubody_smplx_to_coco.py --data-root {$MMPOSE/data/UBody} --human-model-path {$MMPOSE/data/human_model_path/} +``` diff --git a/mmpose/datasets/datasets/base/base_mocap_dataset.py b/mmpose/datasets/datasets/base/base_mocap_dataset.py index 290edafed0..b06d934ac5 100644 --- a/mmpose/datasets/datasets/base/base_mocap_dataset.py +++ b/mmpose/datasets/datasets/base/base_mocap_dataset.py @@ -96,8 +96,7 @@ def __init__(self, assert exists(_ann_file), ( f'Annotation file `{_ann_file}` does not exist.') - with get_local_path(_ann_file) as local_path: - self.ann_data = np.load(local_path) + self._load_ann_file(_ann_file) self.camera_param_file = camera_param_file if self.camera_param_file: @@ -137,6 +136,19 @@ def __init__(self, lazy_init=lazy_init, max_refetch=max_refetch) + def _load_ann_file(self, ann_file: str) -> dict: + """Load annotation file to get image information. + + Args: + ann_file (str): Annotation file path. + + Returns: + dict: Annotation information. + """ + + with get_local_path(ann_file) as local_path: + self.ann_data = np.load(local_path) + @classmethod def _load_metainfo(cls, metainfo: dict = None) -> dict: """Collect meta information from the dictionary of meta. diff --git a/mmpose/datasets/datasets/body3d/__init__.py b/mmpose/datasets/datasets/body3d/__init__.py index d5afeca578..2b52caeadd 100644 --- a/mmpose/datasets/datasets/body3d/__init__.py +++ b/mmpose/datasets/datasets/body3d/__init__.py @@ -1,4 +1,5 @@ # Copyright (c) OpenMMLab. All rights reserved. from .h36m_dataset import Human36mDataset +from .ubody3d_dataset import UBody3dDataset -__all__ = ['Human36mDataset'] +__all__ = ['Human36mDataset', 'UBody3dDataset'] diff --git a/mmpose/datasets/datasets/body3d/ubody3d_dataset.py b/mmpose/datasets/datasets/body3d/ubody3d_dataset.py new file mode 100644 index 0000000000..85b8d893e7 --- /dev/null +++ b/mmpose/datasets/datasets/body3d/ubody3d_dataset.py @@ -0,0 +1,247 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import os.path as osp +from collections import defaultdict +from typing import List, Tuple + +import numpy as np +from mmengine.fileio import get_local_path +from xtcocotools.coco import COCO + +from mmpose.datasets.datasets import BaseMocapDataset +from mmpose.registry import DATASETS + + +@DATASETS.register_module() +class UBody3dDataset(BaseMocapDataset): + """Ubody3d dataset for 3D human pose estimation. + + "One-Stage 3D Whole-Body Mesh Recovery with Component Aware Transformer", + CVPR'2023. More details can be found in the `paper + `__ . + + Ubody3D keypoints:: + + 0-24: 25 body keypoints, + 25-64: 40 hand keypoints, + 65-136: 72 face keypoints, + + In total, we have 137 keypoints for wholebody 3D pose estimation. + + Args: + ann_file (str): Annotation file path. Default: ''. + seq_len (int): Number of frames in a sequence. Default: 1. + multiple_target (int): If larger than 0, merge every + ``multiple_target`` sequence together. Default: 0. + causal (bool): If set to ``True``, the rightmost input frame will be + the target frame. Otherwise, the middle input frame will be the + target frame. Default: ``True``. + subset_frac (float): The fraction to reduce dataset size. If set to 1, + the dataset size is not reduced. Default: 1. + camera_param_file (str): Cameras' parameters file. Default: ``None``. + data_mode (str): Specifies the mode of data samples: ``'topdown'`` or + ``'bottomup'``. In ``'topdown'`` mode, each data sample contains + one instance; while in ``'bottomup'`` mode, each data sample + contains all instances in a image. Default: ``'topdown'`` + metainfo (dict, optional): Meta information for dataset, such as class + information. Default: ``None``. + data_root (str, optional): The root directory for ``data_prefix`` and + ``ann_file``. Default: ``None``. + data_prefix (dict, optional): Prefix for training data. + Default: ``dict(img='')``. + filter_cfg (dict, optional): Config for filter data. Default: `None`. + indices (int or Sequence[int], optional): Support using first few + data in annotation file to facilitate training/testing on a smaller + dataset. Default: ``None`` which means using all ``data_infos``. + serialize_data (bool, optional): Whether to hold memory using + serialized objects, when enabled, data loader workers can use + shared RAM from master process instead of making a copy. + Default: ``True``. + pipeline (list, optional): Processing pipeline. Default: []. + test_mode (bool, optional): ``test_mode=True`` means in test phase. + Default: ``False``. + lazy_init (bool, optional): Whether to load annotation during + instantiation. In some cases, such as visualization, only the meta + information of the dataset is needed, which is not necessary to + load annotation file. ``Basedataset`` can skip load annotations to + save time by set ``lazy_init=False``. Default: ``False``. + max_refetch (int, optional): If ``Basedataset.prepare_data`` get a + None img. The maximum extra number of cycles to get a valid + image. Default: 1000. + """ + + def __init__(self, + multiple_target: int = 0, + multiple_target_step: int = 0, + seq_step: int = 1, + pad_video_seq: bool = False, + **kwargs): + self.seq_step = seq_step + self.pad_video_seq = pad_video_seq + + if multiple_target > 0 and multiple_target_step == 0: + multiple_target_step = multiple_target + self.multiple_target_step = multiple_target_step + + super().__init__(multiple_target=multiple_target, **kwargs) + + METAINFO: dict = dict(from_file='configs/_base_/datasets/ubody3d.py') + + def _load_ann_file(self, ann_file: str) -> dict: + """Load annotation file.""" + with get_local_path(ann_file) as local_path: + self.ann_data = COCO(local_path) + + def get_sequence_indices(self) -> List[List[int]]: + video_frames = defaultdict(list) + img_ids = self.ann_data.getImgIds() + for img_id in img_ids: + img_info = self.ann_data.loadImgs(img_id)[0] + subj, _, _ = self._parse_image_name(img_info['file_name']) + video_frames[subj].append(img_id) + + sequence_indices = [] + _len = (self.seq_len - 1) * self.seq_step + 1 + _step = self.seq_step + + if self.multiple_target: + for _, _img_ids in sorted(video_frames.items()): + n_frame = len(_img_ids) + _ann_ids = self.ann_data.getAnnIds(imgIds=_img_ids) + seqs_from_video = [ + _ann_ids[i:(i + self.multiple_target):_step] + for i in range(0, n_frame, self.multiple_target_step) + ][:(n_frame + self.multiple_target_step - + self.multiple_target) // self.multiple_target_step] + sequence_indices.extend(seqs_from_video) + else: + for _, _img_ids in sorted(video_frames.items()): + n_frame = len(_img_ids) + _ann_ids = self.ann_data.getAnnIds(imgIds=_img_ids) + if self.pad_video_seq: + # Pad the sequence so that every frame in the sequence will + # be predicted. + if self.causal: + frames_left = self.seq_len - 1 + frames_right = 0 + else: + frames_left = (self.seq_len - 1) // 2 + frames_right = frames_left + for i in range(n_frame): + pad_left = max(0, frames_left - i // _step) + pad_right = max( + 0, frames_right - (n_frame - 1 - i) // _step) + start = max(i % _step, i - frames_left * _step) + end = min(n_frame - (n_frame - 1 - i) % _step, + i + frames_right * _step + 1) + sequence_indices.append([_ann_ids[0]] * pad_left + + _ann_ids[start:end:_step] + + [_ann_ids[-1]] * pad_right) + else: + seqs_from_video = [ + _ann_ids[i:(i + _len):_step] + for i in range(0, n_frame - _len + 1, _step) + ] + sequence_indices.extend(seqs_from_video) + + # reduce dataset size if needed + subset_size = int(len(sequence_indices) * self.subset_frac) + start = np.random.randint(0, len(sequence_indices) - subset_size + 1) + end = start + subset_size + + sequence_indices = sequence_indices[start:end] + + return sequence_indices + + def _parse_image_name(self, image_path: str) -> Tuple[str, int]: + """Parse image name to get video name and frame index. + + Args: + image_name (str): Image name. + + Returns: + tuple[str, int]: Video name and frame index. + """ + trim, file_name = image_path.split('/')[-2:] + frame_id, suffix = file_name.split('.') + return trim, frame_id, suffix + + def _load_annotations(self): + """Load data from annotations in COCO format.""" + num_keypoints = self.metainfo['num_keypoints'] + self._metainfo['CLASSES'] = self.ann_data.loadCats( + self.ann_data.getCatIds()) + + instance_list = [] + image_list = [] + + for i, _ann_ids in enumerate(self.sequence_indices): + expected_num_frames = self.seq_len + if self.multiple_target: + expected_num_frames = self.multiple_target + + assert len(_ann_ids) == (expected_num_frames), ( + f'Expected `frame_ids` == {expected_num_frames}, but ' + f'got {len(_ann_ids)} ') + + anns = self.ann_data.loadAnns(_ann_ids) + img_ids = [] + kpts = np.zeros((len(anns), num_keypoints, 2), dtype=np.float32) + kpts_3d = np.zeros((len(anns), num_keypoints, 3), dtype=np.float32) + keypoints_visible = np.zeros((len(anns), num_keypoints, 1), + dtype=np.float32) + for j, ann in enumerate(anns): + img_ids.append(ann['image_id']) + kpts[j] = np.array(ann['keypoints'], dtype=np.float32) + kpts_3d[j] = np.array(ann['keypoints_3d'], dtype=np.float32) + keypoints_visible[j] = np.array( + ann['keypoints_valid'], dtype=np.float32) + imgs = self.ann_data.loadImgs(img_ids) + keypoints_visible = keypoints_visible.squeeze(-1) + + scales = np.zeros(len(imgs), dtype=np.float32) + centers = np.zeros((len(imgs), 2), dtype=np.float32) + img_paths = np.array([img['file_name'] for img in imgs]) + factors = np.zeros((kpts_3d.shape[0], ), dtype=np.float32) + + target_idx = [-1] if self.causal else [int(self.seq_len // 2)] + if self.multiple_target: + target_idx = list(range(self.multiple_target)) + + cam_param = anns[-1]['camera_param'] + if 'w' not in cam_param or 'h' not in cam_param: + cam_param['w'] = 1000 + cam_param['h'] = 1000 + + instance_info = { + 'num_keypoints': num_keypoints, + 'keypoints': kpts, + 'keypoints_3d': kpts_3d, + 'keypoints_visible': keypoints_visible, + 'scale': scales, + 'center': centers, + 'id': i, + 'category_id': 1, + 'iscrowd': 0, + 'img_paths': list(img_paths), + 'img_ids': [img['id'] for img in imgs], + 'lifting_target': kpts_3d[target_idx], + 'lifting_target_visible': keypoints_visible[target_idx], + 'target_img_paths': img_paths[target_idx], + 'camera_param': cam_param, + 'factor': factors, + 'target_idx': target_idx, + } + + instance_list.append(instance_info) + + for img_id in self.ann_data.getImgIds(): + img = self.ann_data.loadImgs(img_id)[0] + img.update({ + 'img_id': + img_id, + 'img_path': + osp.join(self.data_prefix['img'], img['file_name']), + }) + image_list.append(img) + + return instance_list, image_list diff --git a/mmpose/datasets/transforms/converting.py b/mmpose/datasets/transforms/converting.py index c8204ac7ef..1906f16972 100644 --- a/mmpose/datasets/transforms/converting.py +++ b/mmpose/datasets/transforms/converting.py @@ -91,8 +91,12 @@ def transform(self, results: dict) -> dict: num_instances = results['keypoints'].shape[0] # Initialize output arrays - keypoints = np.zeros((num_instances, self.num_keypoints, 2)) + keypoints = np.zeros((num_instances, self.num_keypoints, 3)) keypoints_visible = np.zeros((num_instances, self.num_keypoints)) + key = 'keypoints_3d' if 'keypoints_3d' in results else 'keypoints' + c = results[key].shape[-1] + + flip_indices = results.get('flip_indices', None) # Create a mask to weight visibility loss keypoints_visible_weights = keypoints_visible.copy() @@ -100,26 +104,38 @@ def transform(self, results: dict) -> dict: # Interpolate keypoints if pairs of source indexes provided if self.interpolation: - keypoints[:, self.target_index] = 0.5 * ( - results['keypoints'][:, self.source_index] + - results['keypoints'][:, self.source_index2]) - + keypoints[:, self.target_index, :c] = 0.5 * ( + results[key][:, self.source_index] + + results[key][:, self.source_index2]) keypoints_visible[:, self.target_index] = results[ - 'keypoints_visible'][:, self.source_index] * \ - results['keypoints_visible'][:, self.source_index2] - + 'keypoints_visible'][:, self.source_index] * results[ + 'keypoints_visible'][:, self.source_index2] + # Flip keypoints if flip_indices provided + if flip_indices is not None: + for i, (x1, x2) in enumerate( + zip(self.source_index, self.source_index2)): + idx = flip_indices[x1] if x1 == x2 else i + flip_indices[i] = idx if idx < self.num_keypoints else i + flip_indices = flip_indices[:len(self.source_index)] # Otherwise just copy from the source index else: keypoints[:, - self.target_index] = results['keypoints'][:, self. - source_index] + self.target_index, :c] = results[key][:, + self.source_index] keypoints_visible[:, self.target_index] = results[ 'keypoints_visible'][:, self.source_index] # Update the results dict - results['keypoints'] = keypoints + results['keypoints'] = keypoints[..., :2] results['keypoints_visible'] = np.stack( [keypoints_visible, keypoints_visible_weights], axis=2) + if 'keypoints_3d' in results: + results['keypoints_3d'] = keypoints + results['lifting_target'] = keypoints[results['target_idx']] + results['lifting_target_visible'] = keypoints_visible[ + results['target_idx']] + results['flip_indices'] = flip_indices + return results def transform_sigmas(self, sigmas: Union[List, np.ndarray]): diff --git a/mmpose/evaluation/metrics/__init__.py b/mmpose/evaluation/metrics/__init__.py index 7090f0226a..9e82356a49 100644 --- a/mmpose/evaluation/metrics/__init__.py +++ b/mmpose/evaluation/metrics/__init__.py @@ -7,9 +7,10 @@ from .keypoint_3d_metrics import MPJPE from .keypoint_partition_metric import KeypointPartitionMetric from .posetrack18_metric import PoseTrack18Metric +from .simple_keypoint_3d_metrics import SimpleMPJPE __all__ = [ 'CocoMetric', 'PCKAccuracy', 'MpiiPCKAccuracy', 'JhmdbPCKAccuracy', 'AUC', 'EPE', 'NME', 'PoseTrack18Metric', 'CocoWholeBodyMetric', - 'KeypointPartitionMetric', 'MPJPE', 'InterHandMetric' + 'KeypointPartitionMetric', 'MPJPE', 'InterHandMetric', 'SimpleMPJPE' ] diff --git a/mmpose/evaluation/metrics/simple_keypoint_3d_metrics.py b/mmpose/evaluation/metrics/simple_keypoint_3d_metrics.py new file mode 100644 index 0000000000..dc0065d5b9 --- /dev/null +++ b/mmpose/evaluation/metrics/simple_keypoint_3d_metrics.py @@ -0,0 +1,119 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from typing import Dict, List, Optional, Sequence + +import numpy as np +from mmengine.evaluator import BaseMetric +from mmengine.logging import MMLogger + +from mmpose.registry import METRICS +from ..functional import keypoint_mpjpe + + +@METRICS.register_module() +class SimpleMPJPE(BaseMetric): + """MPJPE evaluation metric. + + Calculate the mean per-joint position error (MPJPE) of keypoints. + + Note: + - length of dataset: N + - num_keypoints: K + - number of keypoint dimensions: D (typically D = 2) + + Args: + mode (str): Method to align the prediction with the + ground truth. Supported options are: + + - ``'mpjpe'``: no alignment will be applied + - ``'p-mpjpe'``: align in the least-square sense in scale + - ``'n-mpjpe'``: align in the least-square sense in + scale, rotation, and translation. + + collect_device (str): Device name used for collecting results from + different ranks during distributed training. Must be ``'cpu'`` or + ``'gpu'``. Default: ``'cpu'``. + prefix (str, optional): The prefix that will be added in the metric + names to disambiguate homonymous metrics of different evaluators. + If prefix is not provided in the argument, ``self.default_prefix`` + will be used instead. Default: ``None``. + skip_list (list, optional): The list of subject and action combinations + to be skipped. Default: []. + """ + + ALIGNMENT = {'mpjpe': 'none', 'p-mpjpe': 'procrustes', 'n-mpjpe': 'scale'} + + def __init__(self, + mode: str = 'mpjpe', + collect_device: str = 'cpu', + prefix: Optional[str] = None, + skip_list: List[str] = []) -> None: + super().__init__(collect_device=collect_device, prefix=prefix) + allowed_modes = self.ALIGNMENT.keys() + if mode not in allowed_modes: + raise KeyError("`mode` should be 'mpjpe', 'p-mpjpe', or " + f"'n-mpjpe', but got '{mode}'.") + + self.mode = mode + self.skip_list = skip_list + + def process(self, data_batch: Sequence[dict], + data_samples: Sequence[dict]) -> None: + """Process one batch of data samples and predictions. The processed + results should be stored in ``self.results``, which will be used to + compute the metrics when all batches have been processed. + + Args: + data_batch (Sequence[dict]): A batch of data + from the dataloader. + data_samples (Sequence[dict]): A batch of outputs from + the model. + """ + for data_sample in data_samples: + # predicted keypoints coordinates, [T, K, D] + pred_coords = data_sample['pred_instances']['keypoints'] + if pred_coords.ndim == 4: + pred_coords = np.squeeze(pred_coords, axis=0) + # ground truth data_info + gt = data_sample['gt_instances'] + # ground truth keypoints coordinates, [T, K, D] + gt_coords = gt['lifting_target'] + # ground truth keypoints_visible, [T, K, 1] + mask = gt['lifting_target_visible'].astype(bool).reshape( + gt_coords.shape[0], -1) + + result = { + 'pred_coords': pred_coords, + 'gt_coords': gt_coords, + 'mask': mask, + } + + self.results.append(result) + + def compute_metrics(self, results: list) -> Dict[str, float]: + """Compute the metrics from processed results. + + Args: + results (list): The processed results of each batch. + + Returns: + Dict[str, float]: The computed metrics. The keys are the names of + the metrics, and the values are the corresponding results. + """ + logger: MMLogger = MMLogger.get_current_instance() + + # pred_coords: [N, K, D] + pred_coords = np.concatenate( + [result['pred_coords'] for result in results]) + # gt_coords: [N, K, D] + gt_coords = np.concatenate([result['gt_coords'] for result in results]) + # mask: [N, K] + mask = np.concatenate([result['mask'] for result in results]) + + error_name = self.mode.upper() + + logger.info(f'Evaluating {self.mode.upper()}...') + return { + error_name: + keypoint_mpjpe(pred_coords, gt_coords, mask, + self.ALIGNMENT[self.mode]) + } diff --git a/tests/data/ubody3d/ubody3d_train.json b/tests/data/ubody3d/ubody3d_train.json new file mode 100644 index 0000000000..55a4ac5226 --- /dev/null +++ b/tests/data/ubody3d/ubody3d_train.json @@ -0,0 +1 @@ +{"images": [{"id": 15, "height": 720, "width": 1280, "file_name": "Magic_show/Magic_show_S1_Trim1/Magic_show_S1_Trim1/000016.png"}], "annotations": [{"id": 0, "image_id": 15, "bbox": [74.55498504638672, 8.571063995361328, 1062.4967727661133, 701.8491630554199], "segmentation": [[]], "area": 0, "iscrowd": 0, "category_id": 1, "score": 1, "person_id": 0, "hand_box1": [336.4236145019531, 321.40362548828125, 473.6637268066406, 452.62567138671875], "hand_box2": [699.218994140625, 50.335018157958984, 533.58251953125, 621.6577186584473], "keypoints": [[585.656005859375, 1398.5216064453125], [699.9061889648438, 1586.966064453125], [450.14288330078125, 1596.144775390625], [878.3228149414062, 2171.27783203125], [252.16543579101562, 2132.398681640625], [793.895263671875, 2988.90771484375], [232.56475830078125, 2939.503173828125], [588.2872314453125, 570.474365234375], [862.1456298828125, 514.33837890625], [373.89849853515625, 519.60888671875], [1073.739990234375, 765.0070190429688], [89.8785400390625, 775.919921875], [1000.2418212890625, 635.8955688476562], [189.44015502929688, 567.993408203125], [891.81298828125, 2948.2041015625], [1013.4824829101562, 3015.250732421875], [819.24658203125, 3122.821533203125], [172.14041137695312, 2868.272705078125], [31.46063232421875, 2937.01025390625], [244.37692260742188, 3111.135009765625], [760.2764282226562, 235.35623168945312], [469.04644775390625, 237.359130859375], [672.689453125, 216.68638610839844], [536.8645629882812, 215.08010864257812], [594.4747924804688, 302.86590576171875], [937.543212890625, 563.2012939453125], [877.2040405273438, 564.7064819335938], [826.8228759765625, 548.8115234375], [768.3922729492188, 532.2924194335938], [945.0330810546875, 433.25579833984375], [887.2977905273438, 411.39129638671875], [854.9716796875, 409.1885986328125], [812.5216064453125, 409.8503112792969], [993.1986083984375, 415.13519287109375], [983.431640625, 352.09503173828125], [976.8125610351562, 306.58990478515625], [967.6991577148438, 251.8966064453125], [1042.6788330078125, 439.2115783691406], [1061.695068359375, 382.62310791015625], [1078.3428955078125, 336.8554382324219], [1089.8707275390625, 288.113037109375], [1077.3145751953125, 467.8497009277344], [1113.5694580078125, 449.51904296875], [1147.91796875, 434.2681884765625], [1184.372314453125, 406.7205505371094], [262.0787048339844, 512.4108276367188], [314.8291320800781, 495.84429931640625], [355.2375183105469, 463.73870849609375], [400.5841064453125, 429.6348876953125], [290.11627197265625, 385.6371765136719], [334.016357421875, 356.7796325683594], [352.326904296875, 347.6751403808594], [379.92449951171875, 336.6559143066406], [248.99337768554688, 355.2509460449219], [270.441162109375, 294.56085205078125], [283.58990478515625, 247.07943725585938], [298.6072692871094, 191.95077514648438], [194.588623046875, 364.1822509765625], [197.89288330078125, 304.9277038574219], [198.94699096679688, 255.0223846435547], [207.83172607421875, 206.8009490966797], [152.69793701171875, 380.91925048828125], [126.07894897460938, 349.861083984375], [99.02603149414062, 320.67138671875], [75.35498046875, 280.7127380371094], [605.5189819335938, 258.36474609375], [636.6569213867188, 261.03448486328125], [672.689453125, 216.68638610839844], [536.8645629882812, 215.08010864257812], [480.609130859375, 193.2221221923828], [498.7352294921875, 169.0961151123047], [527.0252075195312, 168.48736572265625], [556.564453125, 174.32501220703125], [582.2213134765625, 183.7449188232422], [619.771728515625, 185.09783935546875], [646.1015625, 177.27572631835938], [678.3016357421875, 172.73214721679688], [709.5665283203125, 174.52818298339844], [730.6221313476562, 199.52928161621094], [600.2632446289062, 215.79234313964844], [598.0828247070312, 240.45635986328125], [596.2218627929688, 264.4862976074219], [594.4674072265625, 287.62481689453125], [572.7188110351562, 305.8975830078125], [583.9725341796875, 311.3199157714844], [596.401123046875, 315.5985107421875], [609.6165771484375, 311.5094909667969], [622.2186279296875, 306.6711120605469], [512.6423950195312, 211.75982666015625], [528.5633544921875, 204.07089233398438], [548.4610595703125, 205.9830780029297], [565.9568481445312, 217.66900634765625], [548.8089599609375, 222.94613647460938], [530.2134399414062, 222.75762939453125], [639.6070556640625, 219.82444763183594], [655.8860473632812, 209.6044158935547], [676.3201904296875, 208.3985595703125], [694.9487915039062, 217.1615753173828], [674.3418579101562, 226.85595703125], [655.4156494140625, 225.6745147705078], [551.7490234375, 353.2354736328125], [564.1500244140625, 346.4883728027344], [583.2034912109375, 344.99609375], [595.4065551757812, 347.21868896484375], [607.8397216796875, 345.721435546875], [629.6182250976562, 348.2886047363281], [648.6402587890625, 353.0809631347656], [634.0433349609375, 361.12738037109375], [612.543212890625, 365.1044921875], [598.9017333984375, 366.5699768066406], [585.4385375976562, 366.0231018066406], [566.12353515625, 362.2437744140625], [553.4495239257812, 352.7164001464844], [583.9151000976562, 355.8670654296875], [596.3876342773438, 356.340576171875], [608.99560546875, 356.22100830078125], [648.081787109375, 352.85076904296875], [612.7412719726562, 351.5333251953125], [598.9871215820312, 351.8242492675781], [585.3312377929688, 352.4969482421875], [464.1539001464844, 202.29954528808594], [465.8164978027344, 244.8143768310547], [469.96026611328125, 282.73333740234375], [474.998779296875, 318.5062255859375], [485.900390625, 354.82257080078125], [503.9440002441406, 389.1557922363281], [533.9607543945312, 420.1808776855469], [569.1990356445312, 439.69488525390625], [604.7715454101562, 445.1242370605469], [641.609130859375, 438.5807189941406], [677.1731567382812, 419.1774597167969], [709.558349609375, 390.3476867675781], [728.9358520507812, 358.6229553222656], [743.6824951171875, 323.7010192871094], [752.355224609375, 286.009033203125], [756.031494140625, 248.0742645263672], [756.6275634765625, 206.8378448486328]], "foot_kpts": [1166.72314453125, 38.096336364746094, 0, 1002.4937744140625, 109.48077392578125, 0, 1049.140869140625, 663.1453857421875, 0, 317.3815002441406, 32.0361328125, 0, 402.523681640625, 303.2774963378906, 0, 177.21731567382812, 665.190673828125, 0], "face_kpts": [482.1813659667969, 206.51531982421875, 0, 474.4501037597656, 248.23251342773438, 1, 482.5657043457031, 282.5651550292969, 1, 490.3671569824219, 326.8166198730469, 1, 498.9546813964844, 355.2204895019531, 1, 519.25634765625, 390.5085754394531, 1, 543.9222412109375, 417.4048156738281, 1, 574.4150390625, 437.6228332519531, 1, 614.6944580078125, 442.5209045410156, 1, 648.99267578125, 436.2539978027344, 1, 682.6341552734375, 416.4512023925781, 1, 702.5023193359375, 392.0824279785156, 1, 725.9093017578125, 358.3260803222656, 1, 739.4346923828125, 328.9374084472656, 1, 746.7598876953125, 285.0207824707031, 1, 748.8603515625, 251.59585571289062, 1, 755.915771484375, 212.4534149169922, 0, 496.4743957519531, 188.47494506835938, 1, 514.8231201171875, 177.99856567382812, 1, 535.214111328125, 176.0469970703125, 1, 556.4619140625, 177.9375, 1, 576.8843994140625, 183.35317993164062, 1, 631.4595947265625, 183.65673828125, 1, 652.4815673828125, 180.27340698242188, 1, 676.221923828125, 180.07711791992188, 1, 698.4794921875, 184.41073608398438, 1, 718.5443115234375, 196.21084594726562, 1, 604.396484375, 218.71194458007812, 1, 602.6702880859375, 245.68115234375, 1, 600.9422607421875, 271.4402770996094, 1, 599.4947509765625, 297.5359802246094, 1, 571.33203125, 313.3100891113281, 1, 586.1724853515625, 317.1542663574219, 1, 601.4893798828125, 320.0868835449219, 1, 617.738525390625, 316.9916687011719, 1, 632.822509765625, 313.9440002441406, 1, 524.906005859375, 216.0177001953125, 1, 542.880859375, 206.15841674804688, 1, 563.9365234375, 208.03213500976562, 1, 578.5321044921875, 222.44454956054688, 1, 559.7491455078125, 226.11843872070312, 1, 541.22607421875, 225.11203002929688, 1, 636.491943359375, 223.62353515625, 1, 652.7271728515625, 210.68789672851562, 1, 674.761474609375, 209.86370849609375, 1, 692.972900390625, 221.53323364257812, 1, 674.9864501953125, 228.75543212890625, 1, 656.0750732421875, 229.04306030273438, 1, 560.0743408203125, 351.4398498535156, 1, 577.081787109375, 347.0306091308594, 1, 594.04638671875, 345.2702941894531, 1, 604.1793212890625, 346.1555480957031, 1, 614.151611328125, 344.8525695800781, 1, 634.447509765625, 345.7118225097656, 1, 656.1597900390625, 347.9260559082031, 1, 640.6773681640625, 358.7562561035156, 1, 624.00732421875, 366.7438049316406, 1, 605.445556640625, 369.8896789550781, 1, 588.646484375, 367.5843811035156, 1, 573.5023193359375, 360.9281921386719, 1, 565.385498046875, 352.2278137207031, 1, 585.1085205078125, 353.1212463378906, 1, 604.616943359375, 355.0426330566406, 1, 626.8272705078125, 351.8833312988281, 1, 650.2919921875, 349.2644958496094, 1, 627.5924072265625, 353.0104675292969, 1, 604.7803955078125, 355.8074645996094, 1, 584.6986083984375, 354.2829284667969, 1], "lefthand_kpts": [942.7679443359375, 607.469482421875, 1, 888.291259765625, 539.277587890625, 1, 832.873291015625, 483.5708923339844, 1, 787.126953125, 436.6972351074219, 1, 710.735107421875, 413.7229309082031, 1, 888.9903564453125, 319.5710754394531, 1, 868.0140380859375, 280.7148742675781, 1, 830.3096923828125, 266.0387268066406, 1, 778.9337158203125, 271.2351379394531, 1, 962.7294921875, 272.7072448730469, 1, 955.781005859375, 187.65567016601562, 1, 953.9222412109375, 103.62838745117188, 1, 959.151611328125, 29.267608642578125, 1, 1047.009033203125, 294.3193664550781, 1, 1056.5989990234375, 215.84146118164062, 1, 1066.36865234375, 147.68014526367188, 1, 1081.0699462890625, 65.11972045898438, 1, 1107.0172119140625, 358.7002258300781, 1, 1159.4434814453125, 319.2156677246094, 1, 1206.9718017578125, 272.8797912597656, 1, 1261.1082763671875, 224.43637084960938, 1], "righthand_kpts": [233.142822265625, 582.3209228515625, 1, 300.6414794921875, 508.47479248046875, 1, 362.43896484375, 455.85186767578125, 1, 377.3603515625, 404.19744873046875, 1, 446.76416015625, 377.29241943359375, 1, 342.8802490234375, 310.6497802734375, 1, 368.6904296875, 284.673095703125, 1, 381.802734375, 251.73486328125, 1, 421.5467529296875, 225.363525390625, 1, 283.64288330078125, 254.122802734375, 1, 304.9996337890625, 170.8004150390625, 1, 320.6651611328125, 98.6851806640625, 1, 335.6553955078125, 28.2318115234375, 1, 199.05755615234375, 256.80859375, 1, 206.0360107421875, 177.01025390625, 1, 215.68804931640625, 106.7457275390625, 1, 224.53521728515625, 32.276611328125, 1, 128.827392578125, 294.99359130859375, 1, 99.0606689453125, 239.12982177734375, 1, 65.53125, 189.2431640625, 1, 37.63360595703125, 116.657958984375, 1], "center": [605.8033447265625, 359.4956359863281], "scale": [6.6406049728393555, 8.854140281677246], "keypoints_score": [0.9791078567504883, 0.9932481050491333, 1.0011144876480103, 0.973096489906311, 0.972457766532898, 0.866172194480896, 0.8760361671447754, 0.3526427149772644, 0.3903506398200989, 0.921836793422699, 0.9433825016021729, 0.20496317744255066, 0.2460474669933319, 0.20729553699493408, 0.17142903804779053, 0.18208564817905426, 0.22269707918167114], "face_kpts_score": [0.3680439293384552, 0.5355573892593384, 0.6418813467025757, 0.6644495725631714, 0.7590401768684387, 0.5538617372512817, 0.5907169580459595, 0.5878690481185913, 0.6348617076873779, 0.7361799478530884, 0.6556291580200195, 0.618322491645813, 0.6537319421768188, 0.5892513394355774, 0.7059171199798584, 0.645734429359436, 0.4574907422065735, 0.9639992713928223, 0.9263820648193359, 0.8876979351043701, 0.9284569621086121, 0.9739065170288086, 0.9502178430557251, 0.9174821376800537, 0.918608546257019, 0.9061530232429504, 0.862210750579834, 0.9776759147644043, 0.973875105381012, 0.974762499332428, 0.9565852880477905, 0.9716235399246216, 1.0059518814086914, 0.946382999420166, 0.9594531059265137, 0.9658107757568359, 1.0158061981201172, 0.9708306789398193, 0.9969902634620667, 0.9845597743988037, 0.9349627494812012, 0.9380444288253784, 0.9717998504638672, 0.9871775507926941, 0.9774664640426636, 0.9537898898124695, 0.9465979933738708, 0.9661000967025757, 0.9713011980056763, 0.9717509746551514, 0.956028938293457, 1.000832438468933, 0.9808722734451294, 0.9960898160934448, 0.9364079236984253, 1.0011546611785889, 0.9167187213897705, 0.9541155099868774, 0.9244742393493652, 0.988551139831543, 0.9954862594604492, 0.9832127094268799, 0.978826642036438, 0.9751479625701904, 0.956895112991333, 0.9974040985107422, 0.9864891767501831, 0.9898920655250549], "foot_kpts_score": [0.24755269289016724, 0.1599443256855011, 0.25949808955192566, 0.2688680589199066, 0.14811083674430847, 0.23364056646823883], "lefthand_kpts_score": [0.603957986831665, 0.46176729202270506, 0.5001004695892334, 0.6286116600036621, 0.7983541250228882, 0.7467568874359131, 0.7094749569892883, 0.7889106035232544, 0.8908322811126709, 0.8638974189758301, 1.0441084861755372, 0.9282500505447387, 0.9102095127105713, 0.7738837957382202, 0.94963458776474, 0.8981462478637695, 0.9926700949668884, 0.7828058958053589, 0.9498528003692627, 0.9387582302093506, 0.8471795082092285], "righthand_kpts_score": [0.6722876787185669, 0.60037282705307, 0.5398626983165741, 0.7077780723571777, 0.7050052642822265, 0.6411999225616455, 0.725990629196167, 0.758279001712799, 0.8829087972640991, 0.889958119392395, 0.9569337129592895, 0.9145335912704468, 0.9213766813278198, 0.8925279140472412, 0.9955486416816711, 1.0033048152923585, 1.0014301896095277, 0.9033888339996338, 0.9002806305885315, 0.8902452945709228, 0.888652241230011], "face_box": [445.3220458984375, 145.05938720703125, 348.63178710937495, 332.0302734375], "face_valid": true, "leftfoot_valid": false, "rightfoot_valid": false, "lefthand_valid": true, "righthand_valid": true, "lefthand_box": [699.218994140625, 50.335018157958984, 533.58251953125, 621.6577186584473], "righthand_box": [81.47227172851564, -7.12115478515625, 398.4362548828125, 664.060546875], "lefthand_update": true, "righthand_update": true, "lefthand_kpts_vitposehand": [942.7679443359375, 607.469482421875, 1, 888.291259765625, 539.277587890625, 1, 832.873291015625, 483.5708923339844, 1, 787.126953125, 436.6972351074219, 1, 710.735107421875, 413.7229309082031, 1, 888.9903564453125, 319.5710754394531, 1, 868.0140380859375, 280.7148742675781, 1, 830.3096923828125, 266.0387268066406, 1, 778.9337158203125, 271.2351379394531, 1, 962.7294921875, 272.7072448730469, 1, 955.781005859375, 187.65567016601562, 1, 953.9222412109375, 103.62838745117188, 1, 959.151611328125, 29.267608642578125, 1, 1047.009033203125, 294.3193664550781, 1, 1056.5989990234375, 215.84146118164062, 1, 1066.36865234375, 147.68014526367188, 1, 1081.0699462890625, 65.11972045898438, 1, 1107.0172119140625, 358.7002258300781, 1, 1159.4434814453125, 319.2156677246094, 1, 1206.9718017578125, 272.8797912597656, 1, 1261.1082763671875, 224.43637084960938, 1], "righthand_kpts_vitposehand": [233.142822265625, 582.3209228515625, 1, 300.6414794921875, 508.47479248046875, 1, 362.43896484375, 455.85186767578125, 1, 377.3603515625, 404.19744873046875, 1, 446.76416015625, 377.29241943359375, 1, 342.8802490234375, 310.6497802734375, 1, 368.6904296875, 284.673095703125, 1, 381.802734375, 251.73486328125, 1, 421.5467529296875, 225.363525390625, 1, 283.64288330078125, 254.122802734375, 1, 304.9996337890625, 170.8004150390625, 1, 320.6651611328125, 98.6851806640625, 1, 335.6553955078125, 28.2318115234375, 1, 199.05755615234375, 256.80859375, 1, 206.0360107421875, 177.01025390625, 1, 215.68804931640625, 106.7457275390625, 1, 224.53521728515625, 32.276611328125, 1, 128.827392578125, 294.99359130859375, 1, 99.0606689453125, 239.12982177734375, 1, 65.53125, 189.2431640625, 1, 37.63360595703125, 116.657958984375, 1], "num_keypoints": 9, "full_body": false, "valid_label": 2, "keypoints_3d": [[585.656005859375, 1398.5216064453125, 8.0], [699.9061889648438, 1586.966064453125, 7.7132415771484375], [450.14288330078125, 1596.144775390625, 7.6570892333984375], [878.3228149414062, 2171.27783203125, 5.664215087890625], [252.16543579101562, 2132.398681640625, 5.6501007080078125], [793.895263671875, 2988.90771484375, 4.6084747314453125], [232.56475830078125, 2939.503173828125, 4.28839111328125], [588.2872314453125, 570.474365234375, 9.544265747070312], [862.1456298828125, 514.33837890625, 8.8726806640625], [373.89849853515625, 519.60888671875, 9.171127319335938], [1073.739990234375, 765.0070190429688, 7.1384735107421875], [89.8785400390625, 775.919921875, 7.5379791259765625], [1000.2418212890625, 635.8955688476562, 5.19927978515625], [189.44015502929688, 567.993408203125, 5.757049560546875], [891.81298828125, 2948.2041015625, 3.0384368896484375], [1013.4824829101562, 3015.250732421875, 3.43035888671875], [819.24658203125, 3122.821533203125, 4.943603515625], [172.14041137695312, 2868.272705078125, 2.809112548828125], [31.46063232421875, 2937.01025390625, 3.1867828369140625], [244.37692260742188, 3111.135009765625, 4.5428619384765625], [760.2764282226562, 235.35623168945312, 9.170547485351562], [469.04644775390625, 237.359130859375, 9.270904541015625], [672.689453125, 216.68638610839844, 8.436477661132812], [536.8645629882812, 215.08010864257812, 8.477508544921875], [594.4747924804688, 302.86590576171875, 8.231826782226562], [937.543212890625, 563.2012939453125, 7.81884765625], [877.2040405273438, 564.7064819335938, 7.746490478515625], [826.8228759765625, 548.8115234375, 7.6898651123046875], [768.3922729492188, 532.2924194335938, 7.540069580078125], [945.0330810546875, 433.25579833984375, 7.78143310546875], [887.2977905273438, 411.39129638671875, 7.68023681640625], [854.9716796875, 409.1885986328125, 7.548248291015625], [812.5216064453125, 409.8503112792969, 7.41748046875], [993.1986083984375, 415.13519287109375, 7.762298583984375], [983.431640625, 352.09503173828125, 7.7212677001953125], [976.8125610351562, 306.58990478515625, 7.644317626953125], [967.6991577148438, 251.8966064453125, 7.58074951171875], [1042.6788330078125, 439.2115783691406, 7.7346954345703125], [1061.695068359375, 382.62310791015625, 7.7144622802734375], [1078.3428955078125, 336.8554382324219, 7.6671142578125], [1089.8707275390625, 288.113037109375, 7.64324951171875], [1077.3145751953125, 467.8497009277344, 7.6988525390625], [1113.5694580078125, 449.51904296875, 7.6714019775390625], [1147.91796875, 434.2681884765625, 7.6133880615234375], [1184.372314453125, 406.7205505371094, 7.566802978515625], [262.0787048339844, 512.4108276367188, 7.7939453125], [314.8291320800781, 495.84429931640625, 7.6787109375], [355.2375183105469, 463.73870849609375, 7.6097564697265625], [400.5841064453125, 429.6348876953125, 7.4446563720703125], [290.11627197265625, 385.6371765136719, 7.82208251953125], [334.016357421875, 356.7796325683594, 7.663116455078125], [352.326904296875, 347.6751403808594, 7.499725341796875], [379.92449951171875, 336.6559143066406, 7.330535888671875], [248.99337768554688, 355.2509460449219, 7.84161376953125], [270.441162109375, 294.56085205078125, 7.848602294921875], [283.58990478515625, 247.07943725585938, 7.8173370361328125], [298.6072692871094, 191.95077514648438, 7.8151092529296875], [194.588623046875, 364.1822509765625, 7.8341217041015625], [197.89288330078125, 304.9277038574219, 7.8556976318359375], [198.94699096679688, 255.0223846435547, 7.8529815673828125], [207.83172607421875, 206.8009490966797, 7.8715667724609375], [152.69793701171875, 380.91925048828125, 7.8072052001953125], [126.07894897460938, 349.861083984375, 7.8142547607421875], [99.02603149414062, 320.67138671875, 7.79296875], [75.35498046875, 280.7127380371094, 7.79833984375], [605.5189819335938, 258.36474609375, 7.6539459228515625], [636.6569213867188, 261.03448486328125, 7.6003265380859375], [672.689453125, 216.68638610839844, 6.8922119140625], [536.8645629882812, 215.08010864257812, 6.9332427978515625], [480.609130859375, 193.2221221923828, 7.156890869140625], [498.7352294921875, 169.0961151123047, 7.0008087158203125], [527.0252075195312, 168.48736572265625, 6.879364013671875], [556.564453125, 174.32501220703125, 6.8116912841796875], [582.2213134765625, 183.7449188232422, 6.796417236328125], [619.771728515625, 185.09783935546875, 6.7884368896484375], [646.1015625, 177.27572631835938, 6.788299560546875], [678.3016357421875, 172.73214721679688, 6.8334197998046875], [709.5665283203125, 174.52818298339844, 6.94036865234375], [730.6221313476562, 199.52928161621094, 7.08001708984375], [600.2632446289062, 215.79234313964844, 6.797698974609375], [598.0828247070312, 240.45635986328125, 6.753753662109375], [596.2218627929688, 264.4862976074219, 6.70782470703125], [594.4674072265625, 287.62481689453125, 6.66571044921875], [572.7188110351562, 305.8975830078125, 6.8535308837890625], [583.9725341796875, 311.3199157714844, 6.8229217529296875], [596.401123046875, 315.5985107421875, 6.804962158203125], [609.6165771484375, 311.5094909667969, 6.8159027099609375], [622.2186279296875, 306.6711120605469, 6.8405303955078125], [512.6423950195312, 211.75982666015625, 7.02471923828125], [528.5633544921875, 204.07089233398438, 6.9400634765625], [548.4610595703125, 205.9830780029297, 6.92816162109375], [565.9568481445312, 217.66900634765625, 6.9529266357421875], [548.8089599609375, 222.94613647460938, 6.9491424560546875], [530.2134399414062, 222.75762939453125, 6.9624176025390625], [639.6070556640625, 219.82444763183594, 6.930755615234375], [655.8860473632812, 209.6044158935547, 6.8970184326171875], [676.3201904296875, 208.3985595703125, 6.8957061767578125], [694.9487915039062, 217.1615753173828, 6.9696502685546875], [674.3418579101562, 226.85595703125, 6.9189300537109375], [655.4156494140625, 225.6745147705078, 6.91705322265625], [551.7490234375, 353.2354736328125, 6.971923828125], [564.1500244140625, 346.4883728027344, 6.88177490234375], [583.2034912109375, 344.99609375, 6.8333587646484375], [595.4065551757812, 347.21868896484375, 6.8253173828125], [607.8397216796875, 345.721435546875, 6.82666015625], [629.6182250976562, 348.2886047363281, 6.8668060302734375], [648.6402587890625, 353.0809631347656, 6.940582275390625], [634.0433349609375, 361.12738037109375, 6.8939056396484375], [612.543212890625, 365.1044921875, 6.8557891845703125], [598.9017333984375, 366.5699768066406, 6.8533477783203125], [585.4385375976562, 366.0231018066406, 6.8624725341796875], [566.12353515625, 362.2437744140625, 6.9132232666015625], [553.4495239257812, 352.7164001464844, 6.97503662109375], [583.9151000976562, 355.8670654296875, 6.8811187744140625], [596.3876342773438, 356.340576171875, 6.8712615966796875], [608.99560546875, 356.22100830078125, 6.8746795654296875], [648.081787109375, 352.85076904296875, 6.94110107421875], [612.7412719726562, 351.5333251953125, 6.865570068359375], [598.9871215820312, 351.8242492675781, 6.8616485595703125], [585.3312377929688, 352.4969482421875, 6.87408447265625], [464.1539001464844, 202.29954528808594, 7.4058380126953125], [465.8164978027344, 244.8143768310547, 7.313018798828125], [469.96026611328125, 282.73333740234375, 7.331451416015625], [474.998779296875, 318.5062255859375, 7.377685546875], [485.900390625, 354.82257080078125, 7.34814453125], [503.9440002441406, 389.1557922363281, 7.29644775390625], [533.9607543945312, 420.1808776855469, 7.2111968994140625], [569.1990356445312, 439.69488525390625, 7.0761260986328125], [604.7715454101562, 445.1242370605469, 7.0256805419921875], [641.609130859375, 438.5807189941406, 7.05670166015625], [677.1731567382812, 419.1774597167969, 7.1628265380859375], [709.558349609375, 390.3476867675781, 7.262908935546875], [728.9358520507812, 358.6229553222656, 7.3195648193359375], [743.6824951171875, 323.7010192871094, 7.3823699951171875], [752.355224609375, 286.009033203125, 7.3757171630859375], [756.031494140625, 248.0742645263672, 7.3575439453125], [756.6275634765625, 206.8378448486328, 7.39019775390625]], "keypoints_valid": [[1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0]], "camera_param": {"focal": [34553.93155415853, 34553.93075942993], "princpt": [605.3033752441406, 358.99560546875]}}], "categories": [{"supercategory": "person", "id": 1, "name": "person"}]} \ No newline at end of file diff --git a/tests/test_datasets/test_datasets/test_body_datasets/test_ubody_dataset.py b/tests/test_datasets/test_datasets/test_body_datasets/test_ubody_dataset.py new file mode 100644 index 0000000000..12f780e1a0 --- /dev/null +++ b/tests/test_datasets/test_datasets/test_body_datasets/test_ubody_dataset.py @@ -0,0 +1,77 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from unittest import TestCase + +import numpy as np + +from mmpose.datasets.datasets.body3d import UBody3dDataset + + +class TestUBody3dDataset(TestCase): + + def build_ubody3d_dataset(self, **kwargs): + + cfg = dict( + ann_file='ubody3d_train.json', + data_mode='topdown', + data_root='tests/data/ubody3d', + pipeline=[], + test_mode=False) + + cfg.update(kwargs) + return UBody3dDataset(**cfg) + + def check_data_info_keys(self, data_info: dict): + expected_keys = dict( + img_paths=list, + keypoints=np.ndarray, + keypoints_3d=np.ndarray, + scale=np.ndarray, + center=np.ndarray, + id=int) + + for key, type_ in expected_keys.items(): + self.assertIn(key, data_info) + self.assertIsInstance(data_info[key], type_, key) + + def test_metainfo(self): + dataset = self.build_ubody3d_dataset() + # test dataset_name + self.assertEqual(dataset.metainfo['dataset_name'], 'ubody3d') + + # test number of keypoints + num_keypoints = 137 + self.assertEqual(dataset.metainfo['num_keypoints'], num_keypoints) + self.assertEqual( + len(dataset.metainfo['keypoint_colors']), num_keypoints) + self.assertEqual( + len(dataset.metainfo['dataset_keypoint_weights']), num_keypoints) + + # test some extra metainfo + self.assertEqual( + len(dataset.metainfo['skeleton_links']), + len(dataset.metainfo['skeleton_link_colors'])) + + def test_topdown(self): + # test topdown training + dataset = self.build_ubody3d_dataset(data_mode='topdown') + dataset.full_init() + self.assertEqual(len(dataset), 1) + self.check_data_info_keys(dataset[0]) + + # test topdown testing + dataset = self.build_ubody3d_dataset( + data_mode='topdown', test_mode=True) + dataset.full_init() + self.assertEqual(len(dataset), 1) + self.check_data_info_keys(dataset[0]) + + # test topdown training with sequence config + dataset = self.build_ubody3d_dataset( + data_mode='topdown', + seq_len=1, + seq_step=1, + causal=False, + pad_video_seq=True) + dataset.full_init() + self.assertEqual(len(dataset), 1) + self.check_data_info_keys(dataset[0]) diff --git a/tests/test_datasets/test_transforms/test_converting.py b/tests/test_datasets/test_transforms/test_converting.py index 5cce813b70..dc4376baf9 100644 --- a/tests/test_datasets/test_transforms/test_converting.py +++ b/tests/test_datasets/test_transforms/test_converting.py @@ -81,6 +81,33 @@ def test_transform(self): self.data_info['keypoints_visible'][:, source_index]).all()) + # check 3d keypoint + self.data_info['keypoints_3d'] = np.random.random((4, 17, 3)) + self.data_info['target_idx'] = [-1] + mapping = [(3, 0), (6, 1), (16, 2), (5, 3)] + transform = KeypointConverter(num_keypoints=5, mapping=mapping) + results = transform(self.data_info.copy()) + + # check shape + self.assertEqual(results['keypoints_3d'].shape[0], + self.data_info['keypoints_3d'].shape[0]) + self.assertEqual(results['keypoints_3d'].shape[1], 5) + self.assertEqual(results['keypoints_3d'].shape[2], 3) + self.assertEqual(results['keypoints_visible'].shape[0], + self.data_info['keypoints_visible'].shape[0]) + self.assertEqual(results['keypoints_visible'].shape[1], 5) + + # check value + for source_index, target_index in mapping: + self.assertTrue( + (results['keypoints_3d'][:, target_index] == + self.data_info['keypoints_3d'][:, source_index]).all()) + self.assertEqual(results['keypoints_visible'].ndim, 3) + self.assertEqual(results['keypoints_visible'].shape[2], 2) + self.assertTrue( + (results['keypoints_visible'][:, target_index, 0] == + self.data_info['keypoints_visible'][:, source_index]).all()) + def test_transform_sigmas(self): mapping = [(3, 0), (6, 1), (16, 2), (5, 3)] diff --git a/tools/dataset_converters/ubody_smplx_to_coco.py b/tools/dataset_converters/ubody_smplx_to_coco.py new file mode 100644 index 0000000000..16f827fce1 --- /dev/null +++ b/tools/dataset_converters/ubody_smplx_to_coco.py @@ -0,0 +1,430 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import argparse +import json +import os +import os.path as osp +from functools import partial +from typing import Dict, List + +import mmengine +import numpy as np +import smplx +import torch +from pycocotools.coco import COCO + + +class SMPLX(object): + + def __init__(self, human_model_path): + self.human_model_path = human_model_path + self.layer_args = { + 'create_global_orient': False, + 'create_body_pose': False, + 'create_left_hand_pose': False, + 'create_right_hand_pose': False, + 'create_jaw_pose': False, + 'create_leye_pose': False, + 'create_reye_pose': False, + 'create_betas': False, + 'create_expression': False, + 'create_transl': False, + } + + self.neutral_model = smplx.create( + self.human_model_path, + 'smplx', + gender='NEUTRAL', + use_pca=False, + use_face_contour=True, + **self.layer_args) + if torch.cuda.is_available(): + self.neutral_model = self.neutral_model.to('cuda:0') + + self.vertex_num = 10475 + self.face = self.neutral_model.faces + self.shape_param_dim = 10 + self.expr_code_dim = 10 + # 22 (body joints) + 30 (hand joints) + 1 (face jaw joint) + self.orig_joint_num = 53 + + # yapf: disable + self.orig_joints_name = ( + # 22 body joints + 'Pelvis', 'L_Hip', 'R_Hip', 'Spine_1', 'L_Knee', 'R_Knee', + 'Spine2', 'L_Ankle', 'R_Ankle', 'Spine_3', 'L_Foot', 'R_Foot', + 'Neck', 'L_Collar', 'R_Collar', 'Head', 'L_Shoulder', + 'R_Shoulder', 'L_Elbow', 'R_Elbow', 'L_Wrist', 'R_Wrist', + # left hand joints + 'L_Index_1', 'L_Index_2', 'L_Index_3', 'L_Middle_1', 'L_Middle_2', + 'L_Middle_3', 'L_Pinky_1', 'L_Pinky_2', 'L_Pinky_3', 'L_Ring_1', + 'L_Ring_2', 'L_Ring_3', 'L_Thumb_1', 'L_Thumb_2', 'L_Thumb_3', + # right hand joints + 'R_Index_1', 'R_Index_2', 'R_Index_3', 'R_Middle_1', 'R_Middle_2', + 'R_Middle_3', 'R_Pinky_1', 'R_Pinky_2', 'R_Pinky_3', 'R_Ring_1', + 'R_Ring_2', 'R_Ring_3', 'R_Thumb_1', 'R_Thumb_2', 'R_Thumb_3', + # 1 face jaw joint + 'Jaw', + ) + self.orig_flip_pairs = ( + # body joints + (1, 2), (4, 5), (7, 8), (10, 11), (13, 14), (16, 17), (18, 19), + (20, 21), + # hand joints + (22, 37), (23, 38), (24, 39), (25, 40), (26, 41), (27, 42), + (28, 43), (29, 44), (30, 45), (31, 46), (32, 47), (33, 48), + (34, 49), (35, 50), (36, 51), + ) + # yapf: enable + self.orig_root_joint_idx = self.orig_joints_name.index('Pelvis') + self.orig_joint_part = { + 'body': + range( + self.orig_joints_name.index('Pelvis'), + self.orig_joints_name.index('R_Wrist') + 1), + 'lhand': + range( + self.orig_joints_name.index('L_Index_1'), + self.orig_joints_name.index('L_Thumb_3') + 1), + 'rhand': + range( + self.orig_joints_name.index('R_Index_1'), + self.orig_joints_name.index('R_Thumb_3') + 1), + 'face': + range( + self.orig_joints_name.index('Jaw'), + self.orig_joints_name.index('Jaw') + 1) + } + + # changed SMPLX joint set for the supervision + self.joint_num = ( + 137 # 25 (body joints) + 40 (hand joints) + 72 (face keypoints) + ) + # yapf: disable + self.joints_name = ( + # 25 body joints + 'Pelvis', 'L_Hip', 'R_Hip', 'L_Knee', 'R_Knee', 'L_Ankle', + 'R_Ankle', 'Neck', 'L_Shoulder', 'R_Shoulder', 'L_Elbow', + 'R_Elbow', 'L_Wrist', 'R_Wrist', 'L_Big_toe', 'L_Small_toe', + 'L_Heel', 'R_Big_toe', 'R_Small_toe', 'R_Heel', 'L_Ear', 'R_Ear', + 'L_Eye', 'R_Eye', 'Nose', + # left hand joints + 'L_Thumb_1', 'L_Thumb_2', 'L_Thumb_3', 'L_Thumb4', 'L_Index_1', + 'L_Index_2', 'L_Index_3', 'L_Index_4', 'L_Middle_1', 'L_Middle_2', + 'L_Middle_3', 'L_Middle_4', 'L_Ring_1', 'L_Ring_2', 'L_Ring_3', + 'L_Ring_4', 'L_Pinky_1', 'L_Pinky_2', 'L_Pinky_3', 'L_Pinky_4', + # right hand joints + 'R_Thumb_1', 'R_Thumb_2', 'R_Thumb_3', 'R_Thumb_4', 'R_Index_1', + 'R_Index_2', 'R_Index_3', 'R_Index_4', 'R_Middle_1', 'R_Middle_2', + 'R_Middle_3', 'R_Middle_4', 'R_Ring_1', 'R_Ring_2', 'R_Ring_3', + 'R_Ring_4', 'R_Pinky_1', 'R_Pinky_2', 'R_Pinky_3', 'R_Pinky_4', + # 72 face keypoints + *[ + f'Face_{i}' for i in range(1, 73) + ], + ) + + self.root_joint_idx = self.joints_name.index('Pelvis') + self.lwrist_idx = self.joints_name.index('L_Wrist') + self.rwrist_idx = self.joints_name.index('R_Wrist') + self.neck_idx = self.joints_name.index('Neck') + self.flip_pairs = ( + # body joints + (1, 2), (3, 4), (5, 6), (8, 9), (10, 11), (12, 13), (14, 17), + (15, 18), (16, 19), (20, 21), (22, 23), + # hand joints + (25, 45), (26, 46), (27, 47), (28, 48), (29, 49), (30, 50), + (31, 51), (32, 52), (33, 53), (34, 54), (35, 55), (36, 56), + (37, 57), (38, 58), (39, 59), (40, 60), (41, 61), (42, 62), + (43, 63), (44, 64), + # face eyebrow + (67, 68), (69, 78), (70, 77), (71, 76), (72, 75), (73, 74), + # face below nose + (83, 87), (84, 86), + # face eyes + (88, 97), (89, 96), (90, 95), (91, 94), (92, 99), (93, 98), + # face mouse + (100, 106), (101, 105), (102, 104), (107, 111), (108, 110), + # face lip + (112, 116), (113, 115), (117, 119), + # face contours + (120, 136), (121, 135), (122, 134), (123, 133), (124, 132), + (125, 131), (126, 130), (127, 129) + ) + self.joint_idx = ( + 0, 1, 2, 4, 5, 7, 8, 12, 16, 17, 18, 19, 20, 21, 60, 61, 62, 63, + 64, 65, 59, 58, 57, 56, 55, # body joints + 37, 38, 39, 66, 25, 26, 27, 67, 28, 29, 30, 68, 34, 35, 36, 69, 31, + 32, 33, 70, # left hand joints + 52, 53, 54, 71, 40, 41, 42, 72, 43, 44, 45, 73, 49, 50, 51, 74, 46, + 47, 48, 75, # right hand joints + 22, 15, # jaw, head + 57, 56, # eyeballs + 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, # eyebrow + 86, 87, 88, 89, # nose + 90, 91, 92, 93, 94, # below nose + 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, # eyes + 107, # right mouth + 108, 109, 110, 111, 112, # upper mouth + 113, # left mouth + 114, 115, 116, 117, 118, # lower mouth + 119, # right lip + 120, 121, 122, # upper lip + 123, # left lip + 124, 125, 126, # lower lip + 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, + 140, 141, 142, 143, # face contour + ) + # yapf: enable + + self.joint_part = { + 'body': + range( + self.joints_name.index('Pelvis'), + self.joints_name.index('Nose') + 1), + 'lhand': + range( + self.joints_name.index('L_Thumb_1'), + self.joints_name.index('L_Pinky_4') + 1), + 'rhand': + range( + self.joints_name.index('R_Thumb_1'), + self.joints_name.index('R_Pinky_4') + 1), + 'hand': + range( + self.joints_name.index('L_Thumb_1'), + self.joints_name.index('R_Pinky_4') + 1), + 'face': + range( + self.joints_name.index('Face_1'), + self.joints_name.index('Face_72') + 1) + } + + +def read_annotation_file(annotation_file: str) -> List[Dict]: + with open(annotation_file, 'r') as f: + annotations = json.load(f) + return annotations + + +def cam2pixel(cam_coord, f, c): + x = cam_coord[:, 0] / cam_coord[:, 2] * f[0] + c[0] + y = cam_coord[:, 1] / cam_coord[:, 2] * f[1] + c[1] + z = cam_coord[:, 2] + return np.stack((x, y, z), 1) + + +def process_scene_anno(scene: str, annotation_root: str, splits: np.array, + human_model_path: str): + annos = read_annotation_file( + osp.join(annotation_root, scene, 'smplx_annotation.json')) + keypoint_annos = COCO( + osp.join(annotation_root, scene, 'keypoint_annotation.json')) + human_model = SMPLX(human_model_path) + + train_annos = [] + val_annos = [] + train_imgs = [] + val_imgs = [] + + progress_bar = mmengine.ProgressBar(len(keypoint_annos.anns.keys())) + for aid in keypoint_annos.anns.keys(): + ann = keypoint_annos.anns[aid] + img = keypoint_annos.loadImgs(ann['image_id'])[0] + if img['file_name'].startswith('/'): + file_name = img['file_name'][1:] + else: + file_name = img['file_name'] + + video_name = file_name.split('/')[-2] + if 'Trim' in video_name: + video_name = video_name.split('_Trim')[0] + + img_path = os.path.join( + annotation_root.replace('annotations', 'images'), scene, file_name) + if not os.path.exists(img_path): + progress_bar.update() + continue + if str(aid) not in annos: + progress_bar.update() + continue + + smplx_param = annos[str(aid)] + human_model_param = smplx_param['smplx_param'] + cam_param = smplx_param['cam_param'] + if 'lhand_valid' not in human_model_param: + human_model_param['lhand_valid'] = ann['lefthand_valid'] + human_model_param['rhand_valid'] = ann['righthand_valid'] + human_model_param['face_valid'] = ann['face_valid'] + + rotation_valid = np.ones((human_model.orig_joint_num), + dtype=np.float32) + coord_valid = np.ones((human_model.joint_num), dtype=np.float32) + + root_pose = human_model_param['root_pose'] + body_pose = human_model_param['body_pose'] + shape = human_model_param['shape'] + trans = human_model_param['trans'] + + if 'lhand_pose' in human_model_param and human_model_param.get( + 'lhand_valid', False): + lhand_pose = human_model_param['lhand_pose'] + else: + lhand_pose = np.zeros( + (3 * len(human_model.orig_joint_part['lhand'])), + dtype=np.float32) + rotation_valid[human_model.orig_joint_part['lhand']] = 0 + coord_valid[human_model.orig_joint_part['lhand']] = 0 + + if 'rhand_pose' in human_model_param and human_model_param.get( + 'rhand_valid', False): + rhand_pose = human_model_param['rhand_pose'] + else: + rhand_pose = np.zeros( + (3 * len(human_model.orig_joint_part['rhand'])), + dtype=np.float32) + rotation_valid[human_model.orig_joint_part['rhand']] = 0 + coord_valid[human_model.orig_joint_part['rhand']] = 0 + + if 'jaw_pose' in human_model_param and \ + 'expr' in human_model_param and \ + human_model_param.get('face_valid', False): + jaw_pose = human_model_param['jaw_pose'] + expr = human_model_param['expr'] + else: + jaw_pose = np.zeros((3), dtype=np.float32) + expr = np.zeros((human_model.expr_code_dim), dtype=np.float32) + rotation_valid[human_model.orig_joint_part['face']] = 0 + coord_valid[human_model.orig_joint_part['face']] = 0 + + # init human model inputs + device = torch.device( + 'cuda:0') if torch.cuda.is_available() else torch.device('cpu') + root_pose = torch.FloatTensor(root_pose).to(device).view(1, 3) + body_pose = torch.FloatTensor(body_pose).to(device).view(-1, 3) + lhand_pose = torch.FloatTensor(lhand_pose).to(device).view(-1, 3) + rhand_pose = torch.FloatTensor(rhand_pose).to(device).view(-1, 3) + jaw_pose = torch.FloatTensor(jaw_pose).to(device).view(-1, 3) + shape = torch.FloatTensor(shape).to(device).view(1, -1) + expr = torch.FloatTensor(expr).to(device).view(1, -1) + trans = torch.FloatTensor(trans).to(device).view(1, -1) + zero_pose = torch.zeros((1, 3), dtype=torch.float32, device=device) + + with torch.no_grad(): + output = human_model.neutral_model( + betas=shape, + body_pose=body_pose.view(1, -1), + global_orient=root_pose, + transl=trans, + left_hand_pose=lhand_pose.view(1, -1), + right_hand_pose=rhand_pose.view(1, -1), + jaw_pose=jaw_pose.view(1, -1), + leye_pose=zero_pose, + reye_pose=zero_pose, + expression=expr) + + joint_cam = output.joints[0].cpu().numpy()[human_model.joint_idx, :] + joint_img = cam2pixel(joint_cam, cam_param['focal'], + cam_param['princpt']) + + joint_cam = (joint_cam - joint_cam[human_model.root_joint_idx, None, :] + ) # root-relative + joint_cam[human_model.joint_part['lhand'], :] = ( + joint_cam[human_model.joint_part['lhand'], :] - + joint_cam[human_model.lwrist_idx, None, :] + ) # left hand root-relative + joint_cam[human_model.joint_part['rhand'], :] = ( + joint_cam[human_model.joint_part['rhand'], :] - + joint_cam[human_model.rwrist_idx, None, :] + ) # right hand root-relative + joint_cam[human_model.joint_part['face'], :] = ( + joint_cam[human_model.joint_part['face'], :] - + joint_cam[human_model.neck_idx, None, :]) # face root-relative + + body_3d_size = 2 + output_hm_shape = (16, 16, 12) + joint_img[human_model.joint_part['body'], + 2] = ((joint_cam[human_model.joint_part['body'], 2].copy() / + (body_3d_size / 2) + 1) / 2.0 * output_hm_shape[0]) + joint_img[human_model.joint_part['lhand'], + 2] = ((joint_cam[human_model.joint_part['lhand'], 2].copy() / + (body_3d_size / 2) + 1) / 2.0 * output_hm_shape[0]) + joint_img[human_model.joint_part['rhand'], + 2] = ((joint_cam[human_model.joint_part['rhand'], 2].copy() / + (body_3d_size / 2) + 1) / 2.0 * output_hm_shape[0]) + joint_img[human_model.joint_part['face'], + 2] = ((joint_cam[human_model.joint_part['face'], 2].copy() / + (body_3d_size / 2) + 1) / 2.0 * output_hm_shape[0]) + + keypoints_2d = joint_img[:, :2].copy() + keypoints_3d = joint_img.copy() + keypoints_valid = coord_valid.reshape((-1, 1)) + + ann['keypoints'] = keypoints_2d.tolist() + ann['keypoints_3d'] = keypoints_3d.tolist() + ann['keypoints_valid'] = keypoints_valid.tolist() + ann['camera_param'] = cam_param + img['file_name'] = os.path.join(scene, file_name) + if video_name in splits: + val_annos.append(ann) + val_imgs.append(img) + else: + train_annos.append(ann) + train_imgs.append(img) + progress_bar.update() + + categories = [{ + 'supercategory': 'person', + 'id': 1, + 'name': 'person', + 'keypoints': human_model.joints_name, + 'skeleton': human_model.flip_pairs + }] + train_data = { + 'images': train_imgs, + 'annotations': train_annos, + 'categories': categories + } + val_data = { + 'images': val_imgs, + 'annotations': val_annos, + 'categories': categories + } + + mmengine.dump( + train_data, + osp.join(annotation_root, scene, 'train_3dkeypoint_annotation.json')) + mmengine.dump( + val_data, + osp.join(annotation_root, scene, 'val_3dkeypoint_annotation.json')) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('--data-root', type=str, default='data/UBody') + parser.add_argument('--human-model-path', type=str, default='data/SMPLX') + parser.add_argument( + '--nproc', default=8, type=int, help='number of process') + args = parser.parse_args() + + split_path = f'{args.data_root}/splits/intra_scene_test_list.npy' + annotation_path = f'{args.data_root}/annotations' + + folders = os.listdir(annotation_path) + folders = [f for f in folders if osp.isdir(osp.join(annotation_path, f))] + human_model_path = args.human_model_path + splits = np.load(split_path) + + if args.nproc > 1: + mmengine.track_parallel_progress( + partial( + process_scene_anno, + annotation_root=annotation_path, + splits=splits, + human_model_path=human_model_path), folders, args.nproc) + else: + mmengine.track_progress( + partial( + process_scene_anno, + annotation_root=annotation_path, + splits=splits, + human_model_path=human_model_path), folders)