diff --git a/configs/_base_/models/fcos3d.py b/configs/_base_/models/fcos3d.py index 92ea90760..1465b81a7 100644 --- a/configs/_base_/models/fcos3d.py +++ b/configs/_base_/models/fcos3d.py @@ -29,6 +29,7 @@ pred_attrs=True, pred_velo=True, dir_offset=0.7854, # pi/4 + dir_limit_offset=0, strides=[8, 16, 32, 64, 128], group_reg_dims=(2, 1, 3, 1, 2), # offset, depth, size, rot, velo cls_branch=(256, ), diff --git a/configs/_base_/models/hv_pointpillars_fpn_nus.py b/configs/_base_/models/hv_pointpillars_fpn_nus.py index e153f6c6e..be29269de 100644 --- a/configs/_base_/models/hv_pointpillars_fpn_nus.py +++ b/configs/_base_/models/hv_pointpillars_fpn_nus.py @@ -49,8 +49,8 @@ ranges=[[-50, -50, -1.8, 50, 50, -1.8]], scales=[1, 2, 4], sizes=[ - [0.8660, 2.5981, 1.], # 1.5/sqrt(3) - [0.5774, 1.7321, 1.], # 1/sqrt(3) + [2.5981, 0.8660, 1.], # 1.5 / sqrt(3) + [1.7321, 0.5774, 1.], # 1 / sqrt(3) [1., 1., 1.], [0.4, 0.4, 1], ], @@ -59,8 +59,7 @@ reshape_out=True), assigner_per_size=False, diff_rad_by_sin=True, - dir_offset=0.7854, # pi/4 - dir_limit_offset=0, + dir_offset=-0.7854, # -pi / 4 bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', code_size=9), loss_cls=dict( type='FocalLoss', diff --git a/configs/_base_/models/hv_pointpillars_secfpn_kitti.py b/configs/_base_/models/hv_pointpillars_secfpn_kitti.py index 85076d079..bb2014729 100644 --- a/configs/_base_/models/hv_pointpillars_secfpn_kitti.py +++ b/configs/_base_/models/hv_pointpillars_secfpn_kitti.py @@ -41,7 +41,7 @@ [0, -39.68, -0.6, 70.4, 39.68, -0.6], [0, -39.68, -1.78, 70.4, 39.68, -1.78], ], - sizes=[[0.6, 0.8, 1.73], [0.6, 1.76, 1.73], [1.6, 3.9, 1.56]], + sizes=[[0.8, 0.6, 1.73], [1.76, 0.6, 1.73], [3.9, 1.6, 1.56]], rotations=[0, 1.57], reshape_out=False), diff_rad_by_sin=True, diff --git a/configs/_base_/models/hv_pointpillars_secfpn_waymo.py b/configs/_base_/models/hv_pointpillars_secfpn_waymo.py index 14873ead4..30e23e956 100644 --- a/configs/_base_/models/hv_pointpillars_secfpn_waymo.py +++ b/configs/_base_/models/hv_pointpillars_secfpn_waymo.py @@ -48,15 +48,14 @@ [-74.88, -74.88, -0.1188, 74.88, 74.88, -0.1188], [-74.88, -74.88, 0, 74.88, 74.88, 0]], sizes=[ - [2.08, 4.73, 1.77], # car - [0.84, 1.81, 1.77], # cyclist - [0.84, 0.91, 1.74] # pedestrian + [4.73, 2.08, 1.77], # car + [1.81, 0.84, 1.77], # cyclist + [0.91, 0.84, 1.74] # pedestrian ], rotations=[0, 1.57], reshape_out=False), diff_rad_by_sin=True, - dir_offset=0.7854, # pi/4 - dir_limit_offset=0, + dir_offset=-0.7854, # -pi / 4 bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', code_size=7), loss_cls=dict( type='FocalLoss', diff --git a/configs/_base_/models/hv_second_secfpn_kitti.py b/configs/_base_/models/hv_second_secfpn_kitti.py index 6bf18abe1..e7d569a52 100644 --- a/configs/_base_/models/hv_second_secfpn_kitti.py +++ b/configs/_base_/models/hv_second_secfpn_kitti.py @@ -37,7 +37,7 @@ [0, -40.0, -0.6, 70.4, 40.0, -0.6], [0, -40.0, -1.78, 70.4, 40.0, -1.78], ], - sizes=[[0.6, 0.8, 1.73], [0.6, 1.76, 1.73], [1.6, 3.9, 1.56]], + sizes=[[0.8, 0.6, 1.73], [1.76, 0.6, 1.73], [3.9, 1.6, 1.56]], rotations=[0, 1.57], reshape_out=False), diff_rad_by_sin=True, diff --git a/configs/_base_/models/hv_second_secfpn_waymo.py b/configs/_base_/models/hv_second_secfpn_waymo.py index eb9bd3ae5..0fa39e150 100644 --- a/configs/_base_/models/hv_second_secfpn_waymo.py +++ b/configs/_base_/models/hv_second_secfpn_waymo.py @@ -42,15 +42,14 @@ [-76.8, -51.2, 0, 76.8, 51.2, 0], [-76.8, -51.2, -0.1188, 76.8, 51.2, -0.1188]], sizes=[ - [2.08, 4.73, 1.77], # car - [0.84, 0.91, 1.74], # pedestrian - [0.84, 1.81, 1.77] # cyclist + [4.73, 2.08, 1.77], # car + [0.91, 0.84, 1.74], # pedestrian + [1.81, 0.84, 1.77] # cyclist ], rotations=[0, 1.57], reshape_out=False), diff_rad_by_sin=True, - dir_offset=0.7854, # pi/4 - dir_limit_offset=0, + dir_offset=-0.7854, # -pi / 4 bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', code_size=7), loss_cls=dict( type='FocalLoss', diff --git a/configs/_base_/models/parta2.py b/configs/_base_/models/parta2.py index 6c5ae9a66..aa1556789 100644 --- a/configs/_base_/models/parta2.py +++ b/configs/_base_/models/parta2.py @@ -38,7 +38,7 @@ ranges=[[0, -40.0, -0.6, 70.4, 40.0, -0.6], [0, -40.0, -0.6, 70.4, 40.0, -0.6], [0, -40.0, -1.78, 70.4, 40.0, -1.78]], - sizes=[[0.6, 0.8, 1.73], [0.6, 1.76, 1.73], [1.6, 3.9, 1.56]], + sizes=[[0.8, 0.6, 1.73], [1.76, 0.6, 1.73], [3.9, 1.6, 1.56]], rotations=[0, 1.57], reshape_out=False), diff_rad_by_sin=True, diff --git a/configs/benchmark/hv_PartA2_secfpn_4x8_cyclic_80e_pcdet_kitti-3d-3class.py b/configs/benchmark/hv_PartA2_secfpn_4x8_cyclic_80e_pcdet_kitti-3d-3class.py index 19862097a..398a19cd2 100644 --- a/configs/benchmark/hv_PartA2_secfpn_4x8_cyclic_80e_pcdet_kitti-3d-3class.py +++ b/configs/benchmark/hv_PartA2_secfpn_4x8_cyclic_80e_pcdet_kitti-3d-3class.py @@ -38,7 +38,7 @@ ranges=[[0, -40.0, -0.6, 70.4, 40.0, -0.6], [0, -40.0, -0.6, 70.4, 40.0, -0.6], [0, -40.0, -1.78, 70.4, 40.0, -1.78]], - sizes=[[0.6, 0.8, 1.73], [0.6, 1.76, 1.73], [1.6, 3.9, 1.56]], + sizes=[[0.8, 0.6, 1.73], [1.76, 0.6, 1.73], [3.9, 1.6, 1.56]], rotations=[0, 1.57], reshape_out=False), diff_rad_by_sin=True, diff --git a/configs/benchmark/hv_pointpillars_secfpn_3x8_100e_det3d_kitti-3d-car.py b/configs/benchmark/hv_pointpillars_secfpn_3x8_100e_det3d_kitti-3d-car.py index 42a31a217..72c737245 100644 --- a/configs/benchmark/hv_pointpillars_secfpn_3x8_100e_det3d_kitti-3d-car.py +++ b/configs/benchmark/hv_pointpillars_secfpn_3x8_100e_det3d_kitti-3d-car.py @@ -37,7 +37,7 @@ anchor_generator=dict( type='Anchor3DRangeGenerator', ranges=[[0, -39.68, -1.78, 69.12, 39.68, -1.78]], - sizes=[[1.6, 3.9, 1.56]], + sizes=[[3.9, 1.6, 1.56]], rotations=[0, 1.57], reshape_out=True), diff_rad_by_sin=True, diff --git a/configs/benchmark/hv_pointpillars_secfpn_4x8_80e_pcdet_kitti-3d-3class.py b/configs/benchmark/hv_pointpillars_secfpn_4x8_80e_pcdet_kitti-3d-3class.py index 76ddd69a0..02eed9fb1 100644 --- a/configs/benchmark/hv_pointpillars_secfpn_4x8_80e_pcdet_kitti-3d-3class.py +++ b/configs/benchmark/hv_pointpillars_secfpn_4x8_80e_pcdet_kitti-3d-3class.py @@ -48,7 +48,7 @@ [0, -40.0, -0.6, 70.4, 40.0, -0.6], [0, -40.0, -1.78, 70.4, 40.0, -1.78], ], - sizes=[[0.6, 0.8, 1.73], [0.6, 1.76, 1.73], [1.6, 3.9, 1.56]], + sizes=[[0.8, 0.6, 1.73], [1.76, 0.6, 1.73], [3.9, 1.6, 1.56]], rotations=[0, 1.57], reshape_out=False), diff_rad_by_sin=True, diff --git a/configs/benchmark/hv_second_secfpn_4x8_80e_pcdet_kitti-3d-3class.py b/configs/benchmark/hv_second_secfpn_4x8_80e_pcdet_kitti-3d-3class.py index 1f2b109bf..d61a050fb 100644 --- a/configs/benchmark/hv_second_secfpn_4x8_80e_pcdet_kitti-3d-3class.py +++ b/configs/benchmark/hv_second_secfpn_4x8_80e_pcdet_kitti-3d-3class.py @@ -39,7 +39,7 @@ [0, -40.0, -0.6, 70.4, 40.0, -0.6], [0, -40.0, -1.78, 70.4, 40.0, -1.78], ], - sizes=[[0.6, 0.8, 1.73], [0.6, 1.76, 1.73], [1.6, 3.9, 1.56]], + sizes=[[0.8, 0.6, 1.73], [1.76, 0.6, 1.73], [3.9, 1.6, 1.56]], rotations=[0, 1.57], reshape_out=False), diff_rad_by_sin=True, diff --git a/configs/free_anchor/README.md b/configs/free_anchor/README.md index b38cf3c2f..e88c02487 100644 --- a/configs/free_anchor/README.md +++ b/configs/free_anchor/README.md @@ -49,8 +49,8 @@ model = dict( ranges=[[-50, -50, -1.8, 50, 50, -1.8]], scales=[1, 2, 4], sizes=[ - [0.8660, 2.5981, 1.], # 1.5/sqrt(3) - [0.5774, 1.7321, 1.], # 1/sqrt(3) + [2.5981, 0.8660, 1.], # 1.5 / sqrt(3) + [1.7321, 0.5774, 1.], # 1 / sqrt(3) [1., 1., 1.], [0.4, 0.4, 1], ], @@ -59,8 +59,7 @@ model = dict( reshape_out=True), assigner_per_size=False, diff_rad_by_sin=True, - dir_offset=0.7854, # pi/4 - dir_limit_offset=0, + dir_offset=-0.7854, # -pi / 4 bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', code_size=9), loss_cls=dict( type='FocalLoss', diff --git a/configs/free_anchor/hv_pointpillars_fpn_sbn-all_free-anchor_4x8_2x_nus-3d.py b/configs/free_anchor/hv_pointpillars_fpn_sbn-all_free-anchor_4x8_2x_nus-3d.py index d0a989f12..7412b9308 100644 --- a/configs/free_anchor/hv_pointpillars_fpn_sbn-all_free-anchor_4x8_2x_nus-3d.py +++ b/configs/free_anchor/hv_pointpillars_fpn_sbn-all_free-anchor_4x8_2x_nus-3d.py @@ -21,8 +21,8 @@ ranges=[[-50, -50, -1.8, 50, 50, -1.8]], scales=[1, 2, 4], sizes=[ - [0.8660, 2.5981, 1.], # 1.5/sqrt(3) - [0.5774, 1.7321, 1.], # 1/sqrt(3) + [2.5981, 0.8660, 1.], # 1.5 / sqrt(3) + [1.7321, 0.5774, 1.], # 1 / sqrt(3) [1., 1., 1.], [0.4, 0.4, 1], ], @@ -31,8 +31,7 @@ reshape_out=True), assigner_per_size=False, diff_rad_by_sin=True, - dir_offset=0.7854, # pi/4 - dir_limit_offset=0, + dir_offset=-0.7854, # -pi / 4 bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', code_size=9), loss_cls=dict( type='FocalLoss', diff --git a/configs/imvoxelnet/imvoxelnet_kitti-3d-car.py b/configs/imvoxelnet/imvoxelnet_kitti-3d-car.py index 47932d7f6..06ebe62a2 100644 --- a/configs/imvoxelnet/imvoxelnet_kitti-3d-car.py +++ b/configs/imvoxelnet/imvoxelnet_kitti-3d-car.py @@ -25,7 +25,7 @@ anchor_generator=dict( type='AlignedAnchor3DRangeGenerator', ranges=[[-0.16, -39.68, -1.78, 68.96, 39.68, -1.78]], - sizes=[[1.6, 3.9, 1.56]], + sizes=[[3.9, 1.6, 1.56]], rotations=[0, 1.57], reshape_out=True), diff_rad_by_sin=True, diff --git a/configs/mvxnet/dv_mvx-fpn_second_secfpn_adamw_2x8_80e_kitti-3d-3class.py b/configs/mvxnet/dv_mvx-fpn_second_secfpn_adamw_2x8_80e_kitti-3d-3class.py index 213b626dc..e9f592f5f 100644 --- a/configs/mvxnet/dv_mvx-fpn_second_secfpn_adamw_2x8_80e_kitti-3d-3class.py +++ b/configs/mvxnet/dv_mvx-fpn_second_secfpn_adamw_2x8_80e_kitti-3d-3class.py @@ -74,7 +74,7 @@ [0, -40.0, -0.6, 70.4, 40.0, -0.6], [0, -40.0, -1.78, 70.4, 40.0, -1.78], ], - sizes=[[0.6, 0.8, 1.73], [0.6, 1.76, 1.73], [1.6, 3.9, 1.56]], + sizes=[[0.8, 0.6, 1.73], [1.76, 0.6, 1.73], [3.9, 1.6, 1.56]], rotations=[0, 1.57], reshape_out=False), assigner_per_size=True, diff --git a/configs/parta2/hv_PartA2_secfpn_2x8_cyclic_80e_kitti-3d-car.py b/configs/parta2/hv_PartA2_secfpn_2x8_cyclic_80e_kitti-3d-car.py index 91cf983d9..89be085d8 100644 --- a/configs/parta2/hv_PartA2_secfpn_2x8_cyclic_80e_kitti-3d-car.py +++ b/configs/parta2/hv_PartA2_secfpn_2x8_cyclic_80e_kitti-3d-car.py @@ -10,7 +10,7 @@ _delete_=True, type='Anchor3DRangeGenerator', ranges=[[0, -40.0, -1.78, 70.4, 40.0, -1.78]], - sizes=[[1.6, 3.9, 1.56]], + sizes=[[3.9, 1.6, 1.56]], rotations=[0, 1.57], reshape_out=False)), roi_head=dict( diff --git a/configs/pointpillars/hv_pointpillars_secfpn_6x8_160e_kitti-3d-car.py b/configs/pointpillars/hv_pointpillars_secfpn_6x8_160e_kitti-3d-car.py index 1e0f0faf9..50b89d6aa 100644 --- a/configs/pointpillars/hv_pointpillars_secfpn_6x8_160e_kitti-3d-car.py +++ b/configs/pointpillars/hv_pointpillars_secfpn_6x8_160e_kitti-3d-car.py @@ -10,7 +10,7 @@ _delete_=True, type='Anchor3DRangeGenerator', ranges=[[0, -39.68, -1.78, 69.12, 39.68, -1.78]], - sizes=[[1.6, 3.9, 1.56]], + sizes=[[3.9, 1.6, 1.56]], rotations=[0, 1.57], reshape_out=True)), # model training and testing settings diff --git a/configs/pointpillars/hv_pointpillars_secfpn_sbn-all_2x8_2x_lyft-3d.py b/configs/pointpillars/hv_pointpillars_secfpn_sbn-all_2x8_2x_lyft-3d.py index 46d7b0672..1a0400eb3 100644 --- a/configs/pointpillars/hv_pointpillars_secfpn_sbn-all_2x8_2x_lyft-3d.py +++ b/configs/pointpillars/hv_pointpillars_secfpn_sbn-all_2x8_2x_lyft-3d.py @@ -29,15 +29,15 @@ [-80, -80, -0.9122268, 80, 80, -0.9122268], [-80, -80, -1.8012227, 80, 80, -1.8012227]], sizes=[ - [1.92, 4.75, 1.71], # car - [2.84, 10.24, 3.44], # truck - [2.92, 12.70, 3.42], # bus - [2.42, 6.52, 2.34], # emergency vehicle - [2.75, 8.17, 3.20], # other vehicle - [0.96, 2.35, 1.59], # motorcycle - [0.63, 1.76, 1.44], # bicycle - [0.76, 0.80, 1.76], # pedestrian - [0.35, 0.73, 0.50] # animal + [4.75, 1.92, 1.71], # car + [10.24, 2.84, 3.44], # truck + [12.70, 2.92, 3.42], # bus + [6.52, 2.42, 2.34], # emergency vehicle + [8.17, 2.75, 3.20], # other vehicle + [2.35, 0.96, 1.59], # motorcycle + [1.76, 0.63, 1.44], # bicycle + [0.80, 0.76, 1.76], # pedestrian + [0.73, 0.35, 0.50] # animal ], rotations=[0, 1.57], reshape_out=True))) diff --git a/configs/pointpillars/hv_pointpillars_secfpn_sbn-all_4x8_2x_nus-3d.py b/configs/pointpillars/hv_pointpillars_secfpn_sbn-all_4x8_2x_nus-3d.py index 868c7ff8c..afff99c63 100644 --- a/configs/pointpillars/hv_pointpillars_secfpn_sbn-all_4x8_2x_nus-3d.py +++ b/configs/pointpillars/hv_pointpillars_secfpn_sbn-all_4x8_2x_nus-3d.py @@ -29,13 +29,13 @@ [-49.6, -49.6, -1.763965, 49.6, 49.6, -1.763965], ], sizes=[ - [1.95017717, 4.60718145, 1.72270761], # car - [2.4560939, 6.73778078, 2.73004906], # truck - [2.87427237, 12.01320693, 3.81509561], # trailer - [0.60058911, 1.68452161, 1.27192197], # bicycle - [0.66344886, 0.7256437, 1.75748069], # pedestrian - [0.39694519, 0.40359262, 1.06232151], # traffic_cone - [2.49008838, 0.48578221, 0.98297065], # barrier + [4.60718145, 1.95017717, 1.72270761], # car + [6.73778078, 2.4560939, 2.73004906], # truck + [12.01320693, 2.87427237, 3.81509561], # trailer + [1.68452161, 0.60058911, 1.27192197], # bicycle + [0.7256437, 0.66344886, 1.75748069], # pedestrian + [0.40359262, 0.39694519, 1.06232151], # traffic_cone + [0.48578221, 2.49008838, 0.98297065], # barrier ], custom_values=[0, 0], rotations=[0, 1.57], diff --git a/configs/pointpillars/hv_pointpillars_secfpn_sbn-all_range100_2x8_2x_lyft-3d.py b/configs/pointpillars/hv_pointpillars_secfpn_sbn-all_range100_2x8_2x_lyft-3d.py index fa18aca2c..7964b7998 100644 --- a/configs/pointpillars/hv_pointpillars_secfpn_sbn-all_range100_2x8_2x_lyft-3d.py +++ b/configs/pointpillars/hv_pointpillars_secfpn_sbn-all_range100_2x8_2x_lyft-3d.py @@ -28,15 +28,15 @@ [-100, -100, -0.9122268, 100, 100, -0.9122268], [-100, -100, -1.8012227, 100, 100, -1.8012227]], sizes=[ - [1.92, 4.75, 1.71], # car - [2.84, 10.24, 3.44], # truck - [2.92, 12.70, 3.42], # bus - [2.42, 6.52, 2.34], # emergency vehicle - [2.75, 8.17, 3.20], # other vehicle - [0.96, 2.35, 1.59], # motorcycle - [0.63, 1.76, 1.44], # bicycle - [0.76, 0.80, 1.76], # pedestrian - [0.35, 0.73, 0.50] # animal + [4.75, 1.92, 1.71], # car + [10.24, 2.84, 3.44], # truck + [12.70, 2.92, 3.42], # bus + [6.52, 2.42, 2.34], # emergency vehicle + [8.17, 2.75, 3.20], # other vehicle + [2.35, 0.96, 1.59], # motorcycle + [1.76, 0.63, 1.44], # bicycle + [0.80, 0.76, 1.76], # pedestrian + [0.73, 0.35, 0.50] # animal ], rotations=[0, 1.57], reshape_out=True))) diff --git a/configs/pointpillars/hv_pointpillars_secfpn_sbn_2x16_2x_waymo-3d-car.py b/configs/pointpillars/hv_pointpillars_secfpn_sbn_2x16_2x_waymo-3d-car.py index aeac750d9..90f2a42c5 100644 --- a/configs/pointpillars/hv_pointpillars_secfpn_sbn_2x16_2x_waymo-3d-car.py +++ b/configs/pointpillars/hv_pointpillars_secfpn_sbn_2x16_2x_waymo-3d-car.py @@ -17,7 +17,7 @@ anchor_generator=dict( type='AlignedAnchor3DRangeGenerator', ranges=[[-74.88, -74.88, -0.0345, 74.88, 74.88, -0.0345]], - sizes=[[2.08, 4.73, 1.77]], + sizes=[[4.73, 2.08, 1.77]], rotations=[0, 1.57], reshape_out=True)), # model training and testing settings diff --git a/configs/pointpillars/hv_pointpillars_secfpn_sbn_2x16_2x_waymoD5-3d-car.py b/configs/pointpillars/hv_pointpillars_secfpn_sbn_2x16_2x_waymoD5-3d-car.py index 1fe32fd40..3a3e32669 100644 --- a/configs/pointpillars/hv_pointpillars_secfpn_sbn_2x16_2x_waymoD5-3d-car.py +++ b/configs/pointpillars/hv_pointpillars_secfpn_sbn_2x16_2x_waymoD5-3d-car.py @@ -14,7 +14,7 @@ anchor_generator=dict( type='AlignedAnchor3DRangeGenerator', ranges=[[-74.88, -74.88, -0.0345, 74.88, 74.88, -0.0345]], - sizes=[[2.08, 4.73, 1.77]], + sizes=[[4.73, 2.08, 1.77]], rotations=[0, 1.57], reshape_out=True)), # model training and testing settings diff --git a/configs/regnet/hv_pointpillars_regnet-400mf_secfpn_sbn-all_2x8_2x_lyft-3d.py b/configs/regnet/hv_pointpillars_regnet-400mf_secfpn_sbn-all_2x8_2x_lyft-3d.py index f1a3f4d82..fb330d785 100644 --- a/configs/regnet/hv_pointpillars_regnet-400mf_secfpn_sbn-all_2x8_2x_lyft-3d.py +++ b/configs/regnet/hv_pointpillars_regnet-400mf_secfpn_sbn-all_2x8_2x_lyft-3d.py @@ -25,15 +25,15 @@ [-80, -80, -0.9122268, 80, 80, -0.9122268], [-80, -80, -1.8012227, 80, 80, -1.8012227]], sizes=[ - [1.92, 4.75, 1.71], # car - [2.84, 10.24, 3.44], # truck - [2.92, 12.70, 3.42], # bus - [2.42, 6.52, 2.34], # emergency vehicle - [2.75, 8.17, 3.20], # other vehicle - [0.96, 2.35, 1.59], # motorcycle - [0.63, 1.76, 1.44], # bicycle - [0.76, 0.80, 1.76], # pedestrian - [0.35, 0.73, 0.50] # animal + [4.75, 1.92, 1.71], # car + [10.24, 2.84, 3.44], # truck + [12.70, 2.92, 3.42], # bus + [6.52, 2.42, 2.34], # emergency vehicle + [8.17, 2.75, 3.20], # other vehicle + [2.35, 0.96, 1.59], # motorcycle + [1.76, 0.63, 1.44], # bicycle + [0.80, 0.76, 1.76], # pedestrian + [0.73, 0.35, 0.50] # animal ], rotations=[0, 1.57], reshape_out=True))) diff --git a/configs/regnet/hv_pointpillars_regnet-400mf_secfpn_sbn-all_4x8_2x_nus-3d.py b/configs/regnet/hv_pointpillars_regnet-400mf_secfpn_sbn-all_4x8_2x_nus-3d.py index 0f9e031f9..ef8996a18 100644 --- a/configs/regnet/hv_pointpillars_regnet-400mf_secfpn_sbn-all_4x8_2x_nus-3d.py +++ b/configs/regnet/hv_pointpillars_regnet-400mf_secfpn_sbn-all_4x8_2x_nus-3d.py @@ -25,13 +25,13 @@ [-49.6, -49.6, -1.763965, 49.6, 49.6, -1.763965], ], sizes=[ - [1.95017717, 4.60718145, 1.72270761], # car - [2.4560939, 6.73778078, 2.73004906], # truck - [2.87427237, 12.01320693, 3.81509561], # trailer - [0.60058911, 1.68452161, 1.27192197], # bicycle - [0.66344886, 0.7256437, 1.75748069], # pedestrian - [0.39694519, 0.40359262, 1.06232151], # traffic_cone - [2.49008838, 0.48578221, 0.98297065], # barrier + [4.60718145, 1.95017717, 1.72270761], # car + [6.73778078, 2.4560939, 2.73004906], # truck + [12.01320693, 2.87427237, 3.81509561], # trailer + [1.68452161, 0.60058911, 1.27192197], # bicycle + [0.7256437, 0.66344886, 1.75748069], # pedestrian + [0.40359262, 0.39694519, 1.06232151], # traffic_cone + [0.48578221, 2.49008838, 0.98297065], # barrier ], custom_values=[0, 0], rotations=[0, 1.57], diff --git a/configs/regnet/hv_pointpillars_regnet-400mf_secfpn_sbn-all_range100_2x8_2x_lyft-3d.py b/configs/regnet/hv_pointpillars_regnet-400mf_secfpn_sbn-all_range100_2x8_2x_lyft-3d.py index c7bc8f16e..2af3719c9 100644 --- a/configs/regnet/hv_pointpillars_regnet-400mf_secfpn_sbn-all_range100_2x8_2x_lyft-3d.py +++ b/configs/regnet/hv_pointpillars_regnet-400mf_secfpn_sbn-all_range100_2x8_2x_lyft-3d.py @@ -26,15 +26,15 @@ [-100, -100, -0.9122268, 100, 100, -0.9122268], [-100, -100, -1.8012227, 100, 100, -1.8012227]], sizes=[ - [1.92, 4.75, 1.71], # car - [2.84, 10.24, 3.44], # truck - [2.92, 12.70, 3.42], # bus - [2.42, 6.52, 2.34], # emergency vehicle - [2.75, 8.17, 3.20], # other vehicle - [0.96, 2.35, 1.59], # motorcycle - [0.63, 1.76, 1.44], # bicycle - [0.76, 0.80, 1.76], # pedestrian - [0.35, 0.73, 0.50] # animal + [4.75, 1.92, 1.71], # car + [10.24, 2.84, 3.44], # truck + [12.70, 2.92, 3.42], # bus + [6.52, 2.42, 2.34], # emergency vehicle + [8.17, 2.75, 3.20], # other vehicle + [2.35, 0.96, 1.59], # motorcycle + [1.76, 0.63, 1.44], # bicycle + [0.80, 0.76, 1.76], # pedestrian + [0.73, 0.35, 0.50] # animal ], rotations=[0, 1.57], reshape_out=True))) diff --git a/configs/second/hv_second_secfpn_6x8_80e_kitti-3d-car.py b/configs/second/hv_second_secfpn_6x8_80e_kitti-3d-car.py index c4f2ffd51..9ab7350ac 100644 --- a/configs/second/hv_second_secfpn_6x8_80e_kitti-3d-car.py +++ b/configs/second/hv_second_secfpn_6x8_80e_kitti-3d-car.py @@ -12,7 +12,7 @@ _delete_=True, type='Anchor3DRangeGenerator', ranges=[[0, -40.0, -1.78, 70.4, 40.0, -1.78]], - sizes=[[1.6, 3.9, 1.56]], + sizes=[[3.9, 1.6, 1.56]], rotations=[0, 1.57], reshape_out=True)), # model training and testing settings diff --git a/configs/second/hv_second_secfpn_sbn_2x16_2x_waymoD5-3d-3class.py b/configs/second/hv_second_secfpn_sbn_2x16_2x_waymoD5-3d-3class.py index aae54b33a..6412f535d 100644 --- a/configs/second/hv_second_secfpn_sbn_2x16_2x_waymoD5-3d-3class.py +++ b/configs/second/hv_second_secfpn_sbn_2x16_2x_waymoD5-3d-3class.py @@ -21,7 +21,10 @@ classes=class_names, sample_groups=dict(Car=15, Pedestrian=10, Cyclist=10), points_loader=dict( - type='LoadPointsFromFile', load_dim=5, use_dim=[0, 1, 2, 3, 4])) + type='LoadPointsFromFile', + coord_type='LIDAR', + load_dim=5, + use_dim=[0, 1, 2, 3, 4])) train_pipeline = [ dict(type='LoadPointsFromFile', coord_type='LIDAR', load_dim=6, use_dim=5), diff --git a/configs/ssn/hv_ssn_secfpn_sbn-all_2x16_2x_lyft-3d.py b/configs/ssn/hv_ssn_secfpn_sbn-all_2x16_2x_lyft-3d.py index 8a70d2a77..50b33c801 100644 --- a/configs/ssn/hv_ssn_secfpn_sbn-all_2x16_2x_lyft-3d.py +++ b/configs/ssn/hv_ssn_secfpn_sbn-all_2x16_2x_lyft-3d.py @@ -96,15 +96,15 @@ [-100, -100, -0.6276341, 100, 100, -0.6276341], [-100, -100, -0.3033737, 100, 100, -0.3033737]], sizes=[ - [0.63, 1.76, 1.44], # bicycle - [0.96, 2.35, 1.59], # motorcycle - [0.76, 0.80, 1.76], # pedestrian - [0.35, 0.73, 0.50], # animal - [1.92, 4.75, 1.71], # car - [2.42, 6.52, 2.34], # emergency vehicle - [2.92, 12.70, 3.42], # bus - [2.75, 8.17, 3.20], # other vehicle - [2.84, 10.24, 3.44] # truck + [1.76, 0.63, 1.44], # bicycle + [2.35, 0.96, 1.59], # motorcycle + [0.80, 0.76, 1.76], # pedestrian + [0.73, 0.35, 0.50], # animal + [4.75, 1.92, 1.71], # car + [6.52, 2.42, 2.34], # emergency vehicle + [12.70, 2.92, 3.42], # bus + [8.17, 2.75, 3.20], # other vehicle + [10.24, 2.84, 3.44] # truck ], custom_values=[], rotations=[0, 1.57], @@ -137,7 +137,7 @@ ], assign_per_class=True, diff_rad_by_sin=True, - dir_offset=0.7854, # pi/4 + dir_offset=-0.7854, # -pi/4 dir_limit_offset=0, bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', code_size=7), loss_cls=dict( diff --git a/configs/ssn/hv_ssn_secfpn_sbn-all_2x16_2x_nus-3d.py b/configs/ssn/hv_ssn_secfpn_sbn-all_2x16_2x_nus-3d.py index 18b658b0c..855020141 100644 --- a/configs/ssn/hv_ssn_secfpn_sbn-all_2x16_2x_nus-3d.py +++ b/configs/ssn/hv_ssn_secfpn_sbn-all_2x16_2x_nus-3d.py @@ -94,16 +94,16 @@ [-50, -50, -1.80673031, 50, 50, -1.80673031], [-50, -50, -1.64824291, 50, 50, -1.64824291]], sizes=[ - [0.60058911, 1.68452161, 1.27192197], # bicycle - [0.76279481, 2.09973778, 1.44403034], # motorcycle - [0.66344886, 0.72564370, 1.75748069], # pedestrian - [0.39694519, 0.40359262, 1.06232151], # traffic cone - [2.49008838, 0.48578221, 0.98297065], # barrier - [1.95017717, 4.60718145, 1.72270761], # car - [2.45609390, 6.73778078, 2.73004906], # truck - [2.87427237, 12.01320693, 3.81509561], # trailer - [2.94046906, 11.1885991, 3.47030982], # bus - [2.73050468, 6.38352896, 3.13312415] # construction vehicle + [1.68452161, 0.60058911, 1.27192197], # bicycle + [2.09973778, 0.76279481, 1.44403034], # motorcycle + [0.72564370, 0.66344886, 1.75748069], # pedestrian + [0.40359262, 0.39694519, 1.06232151], # traffic cone + [0.48578221, 2.49008838, 0.98297065], # barrier + [4.60718145, 1.95017717, 1.72270761], # car + [6.73778078, 2.45609390, 2.73004906], # truck + [12.01320693, 2.87427237, 3.81509561], # trailer + [11.1885991, 2.94046906, 3.47030982], # bus + [6.38352896, 2.73050468, 3.13312415] # construction vehicle ], custom_values=[0, 0], rotations=[0, 1.57], @@ -144,7 +144,7 @@ ], assign_per_class=True, diff_rad_by_sin=True, - dir_offset=0.7854, # pi/4 + dir_offset=-0.7854, # -pi/4 dir_limit_offset=0, bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', code_size=9), loss_cls=dict( diff --git a/mmdet3d/apis/inference.py b/mmdet3d/apis/inference.py index 78dfa1e03..f23a66b0f 100644 --- a/mmdet3d/apis/inference.py +++ b/mmdet3d/apis/inference.py @@ -7,7 +7,7 @@ from mmcv.runner import load_checkpoint from os import path as osp -from mmdet3d.core import (Box3DMode, DepthInstance3DBoxes, +from mmdet3d.core import (Box3DMode, Coord3DMode, DepthInstance3DBoxes, LiDARInstance3DBoxes, show_multi_modality_result, show_result, show_seg_result) from mmdet3d.core.bbox import get_box_type @@ -315,8 +315,7 @@ def show_det_result_meshlab(data, # for now we convert points into depth mode box_mode = data['img_metas'][0][0]['box_mode_3d'] if box_mode != Box3DMode.DEPTH: - points = points[..., [1, 0, 2]] - points[..., 0] *= -1 + points = Coord3DMode.convert(points, box_mode, Coord3DMode.DEPTH) show_bboxes = Box3DMode.convert(pred_bboxes, box_mode, Box3DMode.DEPTH) else: show_bboxes = deepcopy(pred_bboxes) diff --git a/mmdet3d/core/anchor/anchor_3d_generator.py b/mmdet3d/core/anchor/anchor_3d_generator.py index f9343ef99..752a5cbcb 100644 --- a/mmdet3d/core/anchor/anchor_3d_generator.py +++ b/mmdet3d/core/anchor/anchor_3d_generator.py @@ -31,7 +31,7 @@ class Anchor3DRangeGenerator(object): def __init__(self, ranges, - sizes=[[1.6, 3.9, 1.56]], + sizes=[[3.9, 1.6, 1.56]], scales=[1], rotations=[0, 1.5707963], custom_values=(), @@ -148,7 +148,7 @@ def anchors_single_range(self, feature_size, anchor_range, scale=1, - sizes=[[1.6, 3.9, 1.56]], + sizes=[[3.9, 1.6, 1.56]], rotations=[0, 1.5707963], device='cuda'): """Generate anchors in a single range. @@ -244,7 +244,7 @@ def anchors_single_range(self, feature_size, anchor_range, scale, - sizes=[[1.6, 3.9, 1.56]], + sizes=[[3.9, 1.6, 1.56]], rotations=[0, 1.5707963], device='cuda'): """Generate anchors in a single range. diff --git a/mmdet3d/core/bbox/box_np_ops.py b/mmdet3d/core/bbox/box_np_ops.py index 256c436a4..c8d55d665 100644 --- a/mmdet3d/core/bbox/box_np_ops.py +++ b/mmdet3d/core/bbox/box_np_ops.py @@ -1,13 +1,17 @@ # TODO: clean the functions in this file and move the APIs into box structures # in the future - import numba import numpy as np +from .structures.utils import limit_period, points_cam2img, rotation_3d_in_axis + def camera_to_lidar(points, r_rect, velo2cam): """Convert points in camera coordinate to lidar coordinate. + Note: + This function is for KITTI only. + Args: points (np.ndarray, shape=[N, 3]): Points in camera coordinate. r_rect (np.ndarray, shape=[4, 4]): Matrix to project points in @@ -26,7 +30,10 @@ def camera_to_lidar(points, r_rect, velo2cam): def box_camera_to_lidar(data, r_rect, velo2cam): - """Covert boxes in camera coordinate to lidar coordinate. + """Convert boxes in camera coordinate to lidar coordinate. + + Note: + This function is for KITTI only. Args: data (np.ndarray, shape=[N, 7]): Boxes in camera coordinate. @@ -39,10 +46,13 @@ def box_camera_to_lidar(data, r_rect, velo2cam): np.ndarray, shape=[N, 3]: Boxes in lidar coordinate. """ xyz = data[:, 0:3] - l, h, w = data[:, 3:4], data[:, 4:5], data[:, 5:6] + dx, dy, dz = data[:, 3:4], data[:, 4:5], data[:, 5:6] r = data[:, 6:7] xyz_lidar = camera_to_lidar(xyz, r_rect, velo2cam) - return np.concatenate([xyz_lidar, w, l, h, r], axis=1) + # yaw and dims also needs to be converted + r_new = -r - np.pi / 2 + r_new = limit_period(r_new, period=np.pi * 2) + return np.concatenate([xyz_lidar, dx, dz, dy, r_new], axis=1) def corners_nd(dims, origin=0.5): @@ -79,23 +89,6 @@ def corners_nd(dims, origin=0.5): return corners -def rotation_2d(points, angles): - """Rotation 2d points based on origin point clockwise when angle positive. - - Args: - points (np.ndarray): Points to be rotated with shape \ - (N, point_size, 2). - angles (np.ndarray): Rotation angle with shape (N). - - Returns: - np.ndarray: Same shape as points. - """ - rot_sin = np.sin(angles) - rot_cos = np.cos(angles) - rot_mat_T = np.stack([[rot_cos, -rot_sin], [rot_sin, rot_cos]]) - return np.einsum('aij,jka->aik', points, rot_mat_T) - - def center_to_corner_box2d(centers, dims, angles=None, origin=0.5): """Convert kitti locations, dimensions and angles to corners. format: center(xy), dims(xy), angles(clockwise when positive) @@ -117,7 +110,7 @@ def center_to_corner_box2d(centers, dims, angles=None, origin=0.5): corners = corners_nd(dims, origin=origin) # corners: [N, 4, 2] if angles is not None: - corners = rotation_2d(corners, angles) + corners = rotation_3d_in_axis(corners, angles) corners += centers.reshape([-1, 1, 2]) return corners @@ -171,37 +164,6 @@ def depth_to_lidar_points(depth, trunc_pixel, P2, r_rect, velo2cam): return lidar_points -def rotation_3d_in_axis(points, angles, axis=0): - """Rotate points in specific axis. - - Args: - points (np.ndarray, shape=[N, point_size, 3]]): - angles (np.ndarray, shape=[N]]): - axis (int, optional): Axis to rotate at. Defaults to 0. - - Returns: - np.ndarray: Rotated points. - """ - # points: [N, point_size, 3] - rot_sin = np.sin(angles) - rot_cos = np.cos(angles) - ones = np.ones_like(rot_cos) - zeros = np.zeros_like(rot_cos) - if axis == 1: - rot_mat_T = np.stack([[rot_cos, zeros, -rot_sin], [zeros, ones, zeros], - [rot_sin, zeros, rot_cos]]) - elif axis == 2 or axis == -1: - rot_mat_T = np.stack([[rot_cos, -rot_sin, zeros], - [rot_sin, rot_cos, zeros], [zeros, zeros, ones]]) - elif axis == 0: - rot_mat_T = np.stack([[zeros, rot_cos, -rot_sin], - [zeros, rot_sin, rot_cos], [ones, zeros, zeros]]) - else: - raise ValueError('axis should in range') - - return np.einsum('aij,jka->aik', points, rot_mat_T) - - def center_to_corner_box3d(centers, dims, angles=None, @@ -258,8 +220,8 @@ def box2d_to_corner_jit(boxes): rot_sin = np.sin(boxes[i, -1]) rot_cos = np.cos(boxes[i, -1]) rot_mat_T[0, 0] = rot_cos - rot_mat_T[0, 1] = -rot_sin - rot_mat_T[1, 0] = rot_sin + rot_mat_T[0, 1] = rot_sin + rot_mat_T[1, 0] = -rot_sin rot_mat_T[1, 1] = rot_cos box_corners[i] = corners[i] @ rot_mat_T + boxes[i, :2] return box_corners @@ -326,15 +288,15 @@ def rotation_points_single_angle(points, angle, axis=0): rot_cos = np.cos(angle) if axis == 1: rot_mat_T = np.array( - [[rot_cos, 0, -rot_sin], [0, 1, 0], [rot_sin, 0, rot_cos]], + [[rot_cos, 0, rot_sin], [0, 1, 0], [-rot_sin, 0, rot_cos]], dtype=points.dtype) elif axis == 2 or axis == -1: rot_mat_T = np.array( - [[rot_cos, -rot_sin, 0], [rot_sin, rot_cos, 0], [0, 0, 1]], + [[rot_cos, rot_sin, 0], [-rot_sin, rot_cos, 0], [0, 0, 1]], dtype=points.dtype) elif axis == 0: rot_mat_T = np.array( - [[1, 0, 0], [0, rot_cos, -rot_sin], [0, rot_sin, rot_cos]], + [[1, 0, 0], [0, rot_cos, rot_sin], [0, -rot_sin, rot_cos]], dtype=points.dtype) else: raise ValueError('axis should in range') @@ -342,44 +304,6 @@ def rotation_points_single_angle(points, angle, axis=0): return points @ rot_mat_T, rot_mat_T -def points_cam2img(points_3d, proj_mat, with_depth=False): - """Project points in camera coordinates to image coordinates. - - Args: - points_3d (np.ndarray): Points in shape (N, 3) - proj_mat (np.ndarray): Transformation matrix between coordinates. - with_depth (bool, optional): Whether to keep depth in the output. - Defaults to False. - - Returns: - np.ndarray: Points in image coordinates with shape [N, 2]. - """ - points_shape = list(points_3d.shape) - points_shape[-1] = 1 - - assert len(proj_mat.shape) == 2, 'The dimension of the projection'\ - f' matrix should be 2 instead of {len(proj_mat.shape)}.' - d1, d2 = proj_mat.shape[:2] - assert (d1 == 3 and d2 == 3) or (d1 == 3 and d2 == 4) or ( - d1 == 4 and d2 == 4), 'The shape of the projection matrix'\ - f' ({d1}*{d2}) is not supported.' - if d1 == 3: - proj_mat_expanded = np.eye(4, dtype=proj_mat.dtype) - proj_mat_expanded[:d1, :d2] = proj_mat - proj_mat = proj_mat_expanded - - points_4 = np.concatenate([points_3d, np.ones(points_shape)], axis=-1) - point_2d = points_4 @ proj_mat.T - point_2d_res = point_2d[..., :2] / point_2d[..., 2:3] - - if with_depth: - points_2d_depth = np.concatenate([point_2d_res, point_2d[..., 2:3]], - axis=-1) - return points_2d_depth - - return point_2d_res - - def box3d_to_bbox(box3d, P2): """Convert box3d in camera coordinates to bbox in image coordinates. @@ -460,25 +384,9 @@ def minmax_to_corner_2d(minmax_box): return center_to_corner_box2d(center, dims, origin=0.0) -def limit_period(val, offset=0.5, period=np.pi): - """Limit the value into a period for periodic function. - - Args: - val (np.ndarray): The value to be converted. - offset (float, optional): Offset to set the value range. \ - Defaults to 0.5. - period (float, optional): Period of the value. Defaults to np.pi. - - Returns: - torch.Tensor: Value in the range of \ - [-offset * period, (1-offset) * period] - """ - return val - np.floor(val / period + offset) * period - - def create_anchors_3d_range(feature_size, anchor_range, - sizes=((1.6, 3.9, 1.56), ), + sizes=((3.9, 1.6, 1.56), ), rotations=(0, np.pi / 2), dtype=np.float32): """Create anchors 3d by range. @@ -491,14 +399,14 @@ def create_anchors_3d_range(feature_size, (x_min, y_min, z_min, x_max, y_max, z_max). sizes (list[list] | np.ndarray | torch.Tensor, optional): Anchor size with shape [N, 3], in order of x, y, z. - Defaults to ((1.6, 3.9, 1.56), ). + Defaults to ((3.9, 1.6, 1.56), ). rotations (list[float] | np.ndarray | torch.Tensor, optional): Rotations of anchors in a single feature grid. Defaults to (0, np.pi / 2). dtype (type, optional): Data type. Default to np.float32. Returns: - np.ndarray: Range based anchors with shape of \ + np.ndarray: Range based anchors with shape of (*feature_size, num_sizes, num_rots, 7). """ anchor_range = np.array(anchor_range, dtype) @@ -549,7 +457,7 @@ def rbbox2d_to_near_bbox(rbboxes): """convert rotated bbox to nearest 'standing' or 'lying' bbox. Args: - rbboxes (np.ndarray): Rotated bboxes with shape of \ + rbboxes (np.ndarray): Rotated bboxes with shape of (N, 5(x, y, xdim, ydim, rad)). Returns: @@ -840,8 +748,8 @@ def boxes3d_to_corners3d_lidar(boxes3d, bottom_center=True): Args: boxes3d (np.ndarray): Boxes with shape of (N, 7) - [x, y, z, w, l, h, ry] in LiDAR coords, see the definition of ry - in KITTI dataset. + [x, y, z, dx, dy, dz, ry] in LiDAR coords, see the definition of + ry in KITTI dataset. bottom_center (bool, optional): Whether z is on the bottom center of object. Defaults to True. @@ -849,19 +757,25 @@ def boxes3d_to_corners3d_lidar(boxes3d, bottom_center=True): np.ndarray: Box corners with the shape of [N, 8, 3]. """ boxes_num = boxes3d.shape[0] - w, l, h = boxes3d[:, 3], boxes3d[:, 4], boxes3d[:, 5] - x_corners = np.array( - [w / 2., -w / 2., -w / 2., w / 2., w / 2., -w / 2., -w / 2., w / 2.], - dtype=np.float32).T - y_corners = np.array( - [-l / 2., -l / 2., l / 2., l / 2., -l / 2., -l / 2., l / 2., l / 2.], - dtype=np.float32).T + dx, dy, dz = boxes3d[:, 3], boxes3d[:, 4], boxes3d[:, 5] + x_corners = np.array([ + dx / 2., -dx / 2., -dx / 2., dx / 2., dx / 2., -dx / 2., -dx / 2., + dx / 2. + ], + dtype=np.float32).T + y_corners = np.array([ + -dy / 2., -dy / 2., dy / 2., dy / 2., -dy / 2., -dy / 2., dy / 2., + dy / 2. + ], + dtype=np.float32).T if bottom_center: z_corners = np.zeros((boxes_num, 8), dtype=np.float32) - z_corners[:, 4:8] = h.reshape(boxes_num, 1).repeat(4, axis=1) # (N, 8) + z_corners[:, 4:8] = dz.reshape(boxes_num, 1).repeat( + 4, axis=1) # (N, 8) else: z_corners = np.array([ - -h / 2., -h / 2., -h / 2., -h / 2., h / 2., h / 2., h / 2., h / 2. + -dz / 2., -dz / 2., -dz / 2., -dz / 2., dz / 2., dz / 2., dz / 2., + dz / 2. ], dtype=np.float32).T @@ -869,9 +783,9 @@ def boxes3d_to_corners3d_lidar(boxes3d, bottom_center=True): zeros, ones = np.zeros( ry.size, dtype=np.float32), np.ones( ry.size, dtype=np.float32) - rot_list = np.array([[np.cos(ry), -np.sin(ry), zeros], - [np.sin(ry), np.cos(ry), zeros], [zeros, zeros, - ones]]) # (3, 3, N) + rot_list = np.array([[np.cos(ry), np.sin(ry), zeros], + [-np.sin(ry), np.cos(ry), zeros], + [zeros, zeros, ones]]) # (3, 3, N) R_list = np.transpose(rot_list, (2, 0, 1)) # (N, 3, 3) temp_corners = np.concatenate((x_corners.reshape( diff --git a/mmdet3d/core/bbox/iou_calculators/iou3d_calculator.py b/mmdet3d/core/bbox/iou_calculators/iou3d_calculator.py index bbc344775..6a930afe4 100644 --- a/mmdet3d/core/bbox/iou_calculators/iou3d_calculator.py +++ b/mmdet3d/core/bbox/iou_calculators/iou3d_calculator.py @@ -30,8 +30,10 @@ def __call__(self, bboxes1, bboxes2, mode='iou', is_aligned=False): between each aligned pair of bboxes1 and bboxes2. Args: - bboxes1 (torch.Tensor): shape (N, 7+N) [x, y, z, h, w, l, ry, v]. - bboxes2 (torch.Tensor): shape (M, 7+N) [x, y, z, h, w, l, ry, v]. + bboxes1 (torch.Tensor): shape (N, 7+N) + [x, y, z, dx, dy, dz, ry, v]. + bboxes2 (torch.Tensor): shape (M, 7+N) + [x, y, z, dx, dy, dz, ry, v]. mode (str): "iou" (intersection over union) or iof (intersection over foreground). is_aligned (bool): Whether the calculation is aligned. @@ -73,8 +75,8 @@ def __call__(self, bboxes1, bboxes2, mode='iou'): calculate the actual 3D IoUs of boxes. Args: - bboxes1 (torch.Tensor): shape (N, 7+C) [x, y, z, h, w, l, ry]. - bboxes2 (torch.Tensor): shape (M, 7+C) [x, y, z, h, w, l, ry]. + bboxes1 (torch.Tensor): shape (N, 7+C) [x, y, z, dx, dy, dz, ry]. + bboxes2 (torch.Tensor): shape (M, 7+C) [x, y, z, dx, dy, dz, ry]. mode (str): "iou" (intersection over union) or iof (intersection over foreground). @@ -109,8 +111,8 @@ def bbox_overlaps_nearest_3d(bboxes1, aligned pair of bboxes1 and bboxes2. Args: - bboxes1 (torch.Tensor): shape (N, 7+C) [x, y, z, h, w, l, ry, v]. - bboxes2 (torch.Tensor): shape (M, 7+C) [x, y, z, h, w, l, ry, v]. + bboxes1 (torch.Tensor): shape (N, 7+C) [x, y, z, dx, dy, dz, ry, v]. + bboxes2 (torch.Tensor): shape (M, 7+C) [x, y, z, dx, dy, dz, ry, v]. mode (str): "iou" (intersection over union) or iof (intersection over foreground). is_aligned (bool): Whether the calculation is aligned @@ -147,8 +149,8 @@ def bbox_overlaps_3d(bboxes1, bboxes2, mode='iou', coordinate='camera'): calculate the actual IoUs of boxes. Args: - bboxes1 (torch.Tensor): shape (N, 7+C) [x, y, z, h, w, l, ry]. - bboxes2 (torch.Tensor): shape (M, 7+C) [x, y, z, h, w, l, ry]. + bboxes1 (torch.Tensor): shape (N, 7+C) [x, y, z, dx, dy, dz, ry]. + bboxes2 (torch.Tensor): shape (M, 7+C) [x, y, z, dx, dy, dz, ry]. mode (str): "iou" (intersection over union) or iof (intersection over foreground). coordinate (str): 'camera' or 'lidar' coordinate system. diff --git a/mmdet3d/core/bbox/samplers/iou_neg_piecewise_sampler.py b/mmdet3d/core/bbox/samplers/iou_neg_piecewise_sampler.py index 8a309ce2f..955f1546d 100644 --- a/mmdet3d/core/bbox/samplers/iou_neg_piecewise_sampler.py +++ b/mmdet3d/core/bbox/samplers/iou_neg_piecewise_sampler.py @@ -8,8 +8,8 @@ class IoUNegPiecewiseSampler(RandomSampler): """IoU Piece-wise Sampling. - Sampling negtive proposals according to a list of IoU thresholds. - The negtive proposals are divided into several pieces according + Sampling negative proposals according to a list of IoU thresholds. + The negative proposals are divided into several pieces according to `neg_iou_piece_thrs`. And the ratio of each piece is indicated by `neg_piece_fractions`. @@ -17,11 +17,11 @@ class IoUNegPiecewiseSampler(RandomSampler): num (int): Number of proposals. pos_fraction (float): The fraction of positive proposals. neg_piece_fractions (list): A list contains fractions that indicates - the ratio of each piece of total negtive samplers. + the ratio of each piece of total negative samplers. neg_iou_piece_thrs (list): A list contains IoU thresholds that indicate the upper bound of this piece. neg_pos_ub (float): The total ratio to limit the upper bound - number of negtive samples. + number of negative samples. add_gt_as_proposals (bool): Whether to add gt as proposals. """ diff --git a/mmdet3d/core/bbox/structures/base_box3d.py b/mmdet3d/core/bbox/structures/base_box3d.py index 1c6549532..b3ee156e8 100644 --- a/mmdet3d/core/bbox/structures/base_box3d.py +++ b/mmdet3d/core/bbox/structures/base_box3d.py @@ -2,6 +2,7 @@ import torch from abc import abstractmethod +from mmdet3d.ops import points_in_boxes_batch, points_in_boxes_gpu from mmdet3d.ops.iou3d import iou3d_cuda from .utils import limit_period, xywhr2xyxyr @@ -130,8 +131,8 @@ def corners(self): @abstractmethod def rotate(self, angle, points=None): - """Rotate boxes with points (optional) with the given angle or \ - rotation matrix. + """Rotate boxes with points (optional) with the given angle or rotation + matrix. Args: angle (float | torch.Tensor | np.ndarray): @@ -169,7 +170,7 @@ def in_range_3d(self, box_range): polygon, we try to reduce the burden for simpler cases. Returns: - torch.Tensor: A binary vector indicating whether each box is \ + torch.Tensor: A binary vector indicating whether each box is inside the reference range. """ in_range_flags = ((self.tensor[:, 0] > box_range[0]) @@ -189,7 +190,7 @@ def in_range_bev(self, box_range): in order of (x_min, y_min, x_max, y_max). Returns: - torch.Tensor: Indicating whether each box is inside \ + torch.Tensor: Indicating whether each box is inside the reference range. """ pass @@ -207,7 +208,7 @@ def convert_to(self, dst, rt_mat=None): to LiDAR. This requires a transformation matrix. Returns: - :obj:`BaseInstance3DBoxes`: The converted box of the same type \ + :obj:`BaseInstance3DBoxes`: The converted box of the same type in the `dst` mode. """ pass @@ -240,7 +241,7 @@ def nonempty(self, threshold: float = 0.0): threshold (float): The threshold of minimal sizes. Returns: - torch.Tensor: A binary vector which represents whether each \ + torch.Tensor: A binary vector which represents whether each box is empty (False) or non-empty (True). """ box = self.tensor @@ -266,8 +267,8 @@ def __getitem__(self, item): subject to Pytorch's indexing semantics. Returns: - :obj:`BaseInstance3DBoxes`: A new object of \ - :class:`BaseInstances3DBoxes` after indexing. + :obj:`BaseInstance3DBoxes`: A new object of + :class:`BaseInstance3DBoxes` after indexing. """ original_type = type(self) if isinstance(item, int): @@ -318,7 +319,7 @@ def to(self, device): device (str | :obj:`torch.device`): The name of the device. Returns: - :obj:`BaseInstance3DBoxes`: A new boxes object on the \ + :obj:`BaseInstance3DBoxes`: A new boxes object on the specific device. """ original_type = type(self) @@ -331,7 +332,7 @@ def clone(self): """Clone the Boxes. Returns: - :obj:`BaseInstance3DBoxes`: Box object with the same properties \ + :obj:`BaseInstance3DBoxes`: Box object with the same properties as self. """ original_type = type(self) @@ -443,14 +444,14 @@ def overlaps(cls, boxes1, boxes2, mode='iou'): def new_box(self, data): """Create a new box object with data. - The new box and its tensor has the similar properties \ + The new box and its tensor has the similar properties as self and self.tensor, respectively. Args: data (torch.Tensor | numpy.array | list): Data to be copied. Returns: - :obj:`BaseInstance3DBoxes`: A new bbox object with ``data``, \ + :obj:`BaseInstance3DBoxes`: A new bbox object with ``data``, the object's other properties are similar to ``self``. """ new_tensor = self.tensor.new_tensor(data) \ @@ -458,3 +459,48 @@ def new_box(self, data): original_type = type(self) return original_type( new_tensor, box_dim=self.box_dim, with_yaw=self.with_yaw) + + def points_in_boxes(self, points, boxes_override=None): + """Find the box which the points are in. + + Args: + points (torch.Tensor): Points in shape (N, 3). + + Returns: + torch.Tensor: The index of box where each point are in. + """ + if boxes_override is not None: + boxes = boxes_override + else: + boxes = self.tensor + box_idx = points_in_boxes_gpu( + points.unsqueeze(0), + boxes.unsqueeze(0).to(points.device)).squeeze(0) + return box_idx + + def points_in_boxes_batch(self, points, boxes_override=None): + """Find points that are in boxes (CUDA). + + Args: + points (torch.Tensor): Points in shape [1, M, 3] or [M, 3], + 3 dimensions are [x, y, z] in LiDAR coordinate. + + Returns: + torch.Tensor: The index of boxes each point lies in with shape + of (B, M, T). + """ + if boxes_override is not None: + boxes = boxes_override + else: + boxes = self.tensor + + points_clone = points.clone()[..., :3] + if points_clone.dim() == 2: + points_clone = points_clone.unsqueeze(0) + else: + assert points_clone.dim() == 3 and points_clone.shape[0] == 1 + + boxes = boxes.to(points_clone.device).unsqueeze(0) + box_idxs_of_pts = points_in_boxes_batch(points_clone, boxes) + + return box_idxs_of_pts.squeeze(0) diff --git a/mmdet3d/core/bbox/structures/box_3d_mode.py b/mmdet3d/core/bbox/structures/box_3d_mode.py index 0b318c36f..0e98cd811 100644 --- a/mmdet3d/core/bbox/structures/box_3d_mode.py +++ b/mmdet3d/core/bbox/structures/box_3d_mode.py @@ -6,6 +6,7 @@ from .cam_box3d import CameraInstance3DBoxes from .depth_box3d import DepthInstance3DBoxes from .lidar_box3d import LiDARInstance3DBoxes +from .utils import limit_period @unique @@ -60,12 +61,12 @@ class Box3DMode(IntEnum): DEPTH = 2 @staticmethod - def convert(box, src, dst, rt_mat=None): + def convert(box, src, dst, rt_mat=None, with_yaw=True): """Convert boxes from `src` mode to `dst` mode. Args: box (tuple | list | np.ndarray | - torch.Tensor | BaseInstance3DBoxes): + torch.Tensor | :obj:`BaseInstance3DBoxes`): Can be a k-tuple, k-list or an Nxk array/tensor, where k = 7. src (:obj:`Box3DMode`): The src Box mode. dst (:obj:`Box3DMode`): The target Box mode. @@ -74,9 +75,13 @@ def convert(box, src, dst, rt_mat=None): The conversion from `src` coordinates to `dst` coordinates usually comes along the change of sensors, e.g., from camera to LiDAR. This requires a transformation matrix. + with_yaw (bool): If `box` is an instance of + :obj:`BaseInstance3DBoxes`, whether or not it has a yaw angle. + Defaults to True. Returns: - (tuple | list | np.ndarray | torch.Tensor | BaseInstance3DBoxes): \ + (tuple | list | np.ndarray | torch.Tensor | + :obj:`BaseInstance3DBoxes`): The converted box of the same type. """ if src == dst: @@ -99,32 +104,53 @@ def convert(box, src, dst, rt_mat=None): else: arr = box.clone() + if is_Instance3DBoxes: + with_yaw = box.with_yaw + # convert box from `src` mode to `dst` mode. x_size, y_size, z_size = arr[..., 3:4], arr[..., 4:5], arr[..., 5:6] + if with_yaw: + yaw = arr[..., 6:7] if src == Box3DMode.LIDAR and dst == Box3DMode.CAM: if rt_mat is None: rt_mat = arr.new_tensor([[0, -1, 0], [0, 0, -1], [1, 0, 0]]) - xyz_size = torch.cat([y_size, z_size, x_size], dim=-1) + xyz_size = torch.cat([x_size, z_size, y_size], dim=-1) + if with_yaw: + yaw = -yaw - np.pi / 2 + yaw = limit_period(yaw, period=np.pi * 2) elif src == Box3DMode.CAM and dst == Box3DMode.LIDAR: if rt_mat is None: rt_mat = arr.new_tensor([[0, 0, 1], [-1, 0, 0], [0, -1, 0]]) - xyz_size = torch.cat([z_size, x_size, y_size], dim=-1) + xyz_size = torch.cat([x_size, z_size, y_size], dim=-1) + if with_yaw: + yaw = -yaw - np.pi / 2 + yaw = limit_period(yaw, period=np.pi * 2) elif src == Box3DMode.DEPTH and dst == Box3DMode.CAM: if rt_mat is None: rt_mat = arr.new_tensor([[1, 0, 0], [0, 0, 1], [0, -1, 0]]) xyz_size = torch.cat([x_size, z_size, y_size], dim=-1) + if with_yaw: + yaw = -yaw elif src == Box3DMode.CAM and dst == Box3DMode.DEPTH: if rt_mat is None: rt_mat = arr.new_tensor([[1, 0, 0], [0, 0, -1], [0, 1, 0]]) xyz_size = torch.cat([x_size, z_size, y_size], dim=-1) + if with_yaw: + yaw = -yaw elif src == Box3DMode.LIDAR and dst == Box3DMode.DEPTH: if rt_mat is None: rt_mat = arr.new_tensor([[0, -1, 0], [1, 0, 0], [0, 0, 1]]) - xyz_size = torch.cat([y_size, x_size, z_size], dim=-1) + xyz_size = torch.cat([x_size, y_size, z_size], dim=-1) + if with_yaw: + yaw = yaw + np.pi / 2 + yaw = limit_period(yaw, period=np.pi * 2) elif src == Box3DMode.DEPTH and dst == Box3DMode.LIDAR: if rt_mat is None: rt_mat = arr.new_tensor([[0, 1, 0], [-1, 0, 0], [0, 0, 1]]) - xyz_size = torch.cat([y_size, x_size, z_size], dim=-1) + xyz_size = torch.cat([x_size, y_size, z_size], dim=-1) + if with_yaw: + yaw = yaw - np.pi / 2 + yaw = limit_period(yaw, period=np.pi * 2) else: raise NotImplementedError( f'Conversion from Box3DMode {src} to {dst} ' @@ -134,13 +160,17 @@ def convert(box, src, dst, rt_mat=None): rt_mat = arr.new_tensor(rt_mat) if rt_mat.size(1) == 4: extended_xyz = torch.cat( - [arr[:, :3], arr.new_ones(arr.size(0), 1)], dim=-1) + [arr[..., :3], arr.new_ones(arr.size(0), 1)], dim=-1) xyz = extended_xyz @ rt_mat.t() else: - xyz = arr[:, :3] @ rt_mat.t() + xyz = arr[..., :3] @ rt_mat.t() - remains = arr[..., 6:] - arr = torch.cat([xyz[:, :3], xyz_size, remains], dim=-1) + if with_yaw: + remains = arr[..., 7:] + arr = torch.cat([xyz[..., :3], xyz_size, yaw, remains], dim=-1) + else: + remains = arr[..., 6:] + arr = torch.cat([xyz[..., :3], xyz_size, remains], dim=-1) # convert arr to the original type original_type = type(box) @@ -159,7 +189,6 @@ def convert(box, src, dst, rt_mat=None): raise NotImplementedError( f'Conversion to {dst} through {original_type}' ' is not supported yet') - return target_type( - arr, box_dim=arr.size(-1), with_yaw=box.with_yaw) + return target_type(arr, box_dim=arr.size(-1), with_yaw=with_yaw) else: return arr diff --git a/mmdet3d/core/bbox/structures/cam_box3d.py b/mmdet3d/core/bbox/structures/cam_box3d.py index 9969037f1..7ac29619e 100644 --- a/mmdet3d/core/bbox/structures/cam_box3d.py +++ b/mmdet3d/core/bbox/structures/cam_box3d.py @@ -1,7 +1,7 @@ import numpy as np import torch -from mmdet3d.core.points import BasePoints +from ...points import BasePoints from .base_box3d import BaseInstance3DBoxes from .utils import limit_period, rotation_3d_in_axis @@ -37,6 +37,7 @@ class CameraInstance3DBoxes(BaseInstance3DBoxes): with_yaw (bool): If True, the value of yaw will be set to 0 as minmax boxes. """ + YAW_AXIS = 1 def __init__(self, tensor, @@ -116,16 +117,16 @@ def corners(self): / | / | (x0, y0, z0) + ----------- + + (x1, y1, z1) | / . | / - | / oriign | / + | / origin | / (x0, y1, z0) + ----------- + -------> x right | (x1, y1, z0) | v down y """ - # TODO: rotation_3d_in_axis function do not support - # empty tensor currently. - assert len(self.tensor) != 0 + if self.tensor.numel() == 0: + return torch.empty([0, 8, 3], device=self.tensor.device) + dims = self.dims corners_norm = torch.from_numpy( np.stack(np.unravel_index(np.arange(8), [2] * 3), axis=1)).to( @@ -136,8 +137,11 @@ def corners(self): corners_norm = corners_norm - dims.new_tensor([0.5, 1, 0.5]) corners = dims.view([-1, 1, 3]) * corners_norm.reshape([1, 8, 3]) - # rotate around y axis - corners = rotation_3d_in_axis(corners, self.tensor[:, 6], axis=1) + # positive direction of the gravity axis + # in cam coord system points to the earth + # so the rotation is clockwise if viewed from above + corners = rotation_3d_in_axis( + corners, self.tensor[:, 6], axis=self.YAW_AXIS, clockwise=True) corners += self.tensor[:, :3].view(-1, 1, 3) return corners @@ -145,7 +149,12 @@ def corners(self): def bev(self): """torch.Tensor: A n x 5 tensor of 2D BEV box of each box with rotation in XYWHR format.""" - return self.tensor[:, [0, 2, 3, 5, 6]] + bev = self.tensor[:, [0, 2, 3, 5, 6]].clone() + # positive direction of the gravity axis + # in cam coord system points to the earth + # so the bev yaw angle needs to be reversed + bev[:, -1] = -bev[:, -1] + return bev @property def nearest_bev(self): @@ -169,8 +178,8 @@ def nearest_bev(self): return bev_boxes def rotate(self, angle, points=None): - """Rotate boxes with points (optional) with the given angle or \ - rotation matrix. + """Rotate boxes with points (optional) with the given angle or rotation + matrix. Args: angle (float | torch.Tensor | np.ndarray): @@ -179,39 +188,43 @@ def rotate(self, angle, points=None): Points to rotate. Defaults to None. Returns: - tuple or None: When ``points`` is None, the function returns \ - None, otherwise it returns the rotated points and the \ + tuple or None: When ``points`` is None, the function returns + None, otherwise it returns the rotated points and the rotation matrix ``rot_mat_T``. """ if not isinstance(angle, torch.Tensor): angle = self.tensor.new_tensor(angle) + assert angle.shape == torch.Size([3, 3]) or angle.numel() == 1, \ f'invalid rotation angle shape {angle.shape}' if angle.numel() == 1: - rot_sin = torch.sin(angle) - rot_cos = torch.cos(angle) - rot_mat_T = self.tensor.new_tensor([[rot_cos, 0, -rot_sin], - [0, 1, 0], - [rot_sin, 0, rot_cos]]) + self.tensor[:, 0:3], rot_mat_T = rotation_3d_in_axis( + self.tensor[:, 0:3], + angle, + axis=self.YAW_AXIS, + return_mat=True, + # positive direction of the gravity axis + # in cam coord system points to the earth + # so the rotation is clockwise if viewed from above + clockwise=True) else: rot_mat_T = angle rot_sin = rot_mat_T[2, 0] rot_cos = rot_mat_T[0, 0] angle = np.arctan2(rot_sin, rot_cos) + self.tensor[:, 0:3] = self.tensor[:, 0:3] @ rot_mat_T - self.tensor[:, :3] = self.tensor[:, :3] @ rot_mat_T self.tensor[:, 6] += angle if points is not None: if isinstance(points, torch.Tensor): points[:, :3] = points[:, :3] @ rot_mat_T elif isinstance(points, np.ndarray): - rot_mat_T = rot_mat_T.numpy() + rot_mat_T = rot_mat_T.cpu().numpy() points[:, :3] = np.dot(points[:, :3], rot_mat_T) elif isinstance(points, BasePoints): - # clockwise - points.rotate(-angle) + points.rotate(rot_mat_T) else: raise ValueError return points, rot_mat_T @@ -263,7 +276,7 @@ def in_range_bev(self, box_range): polygon, we reduce the burden for simpler cases. Returns: - torch.Tensor: Indicating whether each box is inside \ + torch.Tensor: Indicating whether each box is inside the reference range. """ in_range_flags = ((self.tensor[:, 0] > box_range[0]) @@ -295,8 +308,8 @@ def height_overlaps(cls, boxes1, boxes2, mode='iou'): boxes2_top_height = boxes2.top_height.view(1, -1) boxes2_bottom_height = boxes2.bottom_height.view(1, -1) - # In camera coordinate system - # from up to down is the positive direction + # positive direction of the gravity axis + # in cam coord system points to the earth heighest_of_bottom = torch.min(boxes1_bottom_height, boxes2_bottom_height) lowest_of_top = torch.max(boxes1_top_height, boxes2_top_height) @@ -315,9 +328,50 @@ def convert_to(self, dst, rt_mat=None): to LiDAR. This requires a transformation matrix. Returns: - :obj:`BaseInstance3DBoxes`: \ + :obj:`BaseInstance3DBoxes`: The converted box of the same type in the ``dst`` mode. """ from .box_3d_mode import Box3DMode return Box3DMode.convert( box=self, src=Box3DMode.CAM, dst=dst, rt_mat=rt_mat) + + def points_in_boxes(self, points): + """Find the box which the points are in. + + Args: + points (torch.Tensor): Points in shape (N, 3). + + Returns: + torch.Tensor: The index of box where each point are in. + """ + from .coord_3d_mode import Coord3DMode + + points_lidar = Coord3DMode.convert(points, Coord3DMode.CAM, + Coord3DMode.LIDAR) + boxes_lidar = Coord3DMode.convert(self.tensor, Coord3DMode.CAM, + Coord3DMode.LIDAR) + + box_idx = super().points_in_boxes(self, points_lidar, boxes_lidar) + return box_idx + + def points_in_boxes_batch(self, points): + """Find points that are in boxes (CUDA). + + Args: + points (torch.Tensor): Points in shape [1, M, 3] or [M, 3], + 3 dimensions are [x, y, z] in LiDAR coordinate. + + Returns: + torch.Tensor: The index of boxes each point lies in with shape + of (B, M, T). + """ + from .coord_3d_mode import Coord3DMode + + points_lidar = Coord3DMode.convert(points, Coord3DMode.CAM, + Coord3DMode.LIDAR) + boxes_lidar = Coord3DMode.convert(self.tensor, Coord3DMode.CAM, + Coord3DMode.LIDAR) + + box_idx = super().points_in_boxes_batch(self, points_lidar, + boxes_lidar) + return box_idx diff --git a/mmdet3d/core/bbox/structures/coord_3d_mode.py b/mmdet3d/core/bbox/structures/coord_3d_mode.py index edd5f00d3..b42726461 100644 --- a/mmdet3d/core/bbox/structures/coord_3d_mode.py +++ b/mmdet3d/core/bbox/structures/coord_3d_mode.py @@ -2,12 +2,9 @@ import torch from enum import IntEnum, unique -from mmdet3d.core.points import (BasePoints, CameraPoints, DepthPoints, - LiDARPoints) +from ...points import BasePoints, CameraPoints, DepthPoints, LiDARPoints from .base_box3d import BaseInstance3DBoxes -from .cam_box3d import CameraInstance3DBoxes -from .depth_box3d import DepthInstance3DBoxes -from .lidar_box3d import LiDARInstance3DBoxes +from .box_3d_mode import Box3DMode @unique @@ -63,119 +60,73 @@ class Coord3DMode(IntEnum): DEPTH = 2 @staticmethod - def convert(input, src, dst, rt_mat=None): - """Convert boxes or points from `src` mode to `dst` mode.""" + def convert(input, src, dst, rt_mat=None, with_yaw=True, is_point=True): + """Convert boxes or points from `src` mode to `dst` mode. + + Args: + input (tuple | list | np.ndarray | torch.Tensor | + :obj:`BaseInstance3DBoxes` | :obj:`BasePoints`): + Can be a k-tuple, k-list or an Nxk array/tensor, where k = 7. + src (:obj:`Box3DMode` | :obj:`Coord3DMode`): The source mode. + dst (:obj:`Box3DMode` | :obj:`Coord3DMode`): The target mode. + rt_mat (np.ndarray | torch.Tensor): The rotation and translation + matrix between different coordinates. Defaults to None. + The conversion from `src` coordinates to `dst` coordinates + usually comes along the change of sensors, e.g., from camera + to LiDAR. This requires a transformation matrix. + with_yaw (bool): If `box` is an instance of + :obj:`BaseInstance3DBoxes`, whether or not it has a yaw angle. + Defaults to True. + is_point (bool): If `input` is neither an instance of + :obj:`BaseInstance3DBoxes` nor an instance of + :obj:`BasePoints`, whether or not it is point data. + Defaults to True. + + Returns: + (tuple | list | np.ndarray | torch.Tensor | + :obj:`BaseInstance3DBoxes` | :obj:`BasePoints`): + The converted box of the same type. + """ if isinstance(input, BaseInstance3DBoxes): - return Coord3DMode.convert_box(input, src, dst, rt_mat=rt_mat) + return Coord3DMode.convert_box( + input, src, dst, rt_mat=rt_mat, with_yaw=with_yaw) elif isinstance(input, BasePoints): return Coord3DMode.convert_point(input, src, dst, rt_mat=rt_mat) + elif isinstance(input, (tuple, list, np.ndarray, torch.Tensor)): + if is_point: + return Coord3DMode.convert_point( + input, src, dst, rt_mat=rt_mat) + else: + return Coord3DMode.convert_box( + input, src, dst, rt_mat=rt_mat, with_yaw=with_yaw) else: raise NotImplementedError @staticmethod - def convert_box(box, src, dst, rt_mat=None): + def convert_box(box, src, dst, rt_mat=None, with_yaw=True): """Convert boxes from `src` mode to `dst` mode. Args: box (tuple | list | np.ndarray | - torch.Tensor | BaseInstance3DBoxes): + torch.Tensor | :obj:`BaseInstance3DBoxes`): Can be a k-tuple, k-list or an Nxk array/tensor, where k = 7. - src (:obj:`CoordMode`): The src Box mode. - dst (:obj:`CoordMode`): The target Box mode. + src (:obj:`Box3DMode`): The src Box mode. + dst (:obj:`Box3DMode`): The target Box mode. rt_mat (np.ndarray | torch.Tensor): The rotation and translation matrix between different coordinates. Defaults to None. The conversion from `src` coordinates to `dst` coordinates usually comes along the change of sensors, e.g., from camera to LiDAR. This requires a transformation matrix. + with_yaw (bool): If `box` is an instance of + :obj:`BaseInstance3DBoxes`, whether or not it has a yaw angle. + Defaults to True. Returns: - (tuple | list | np.ndarray | torch.Tensor | BaseInstance3DBoxes): \ + (tuple | list | np.ndarray | torch.Tensor | + :obj:`BaseInstance3DBoxes`): The converted box of the same type. """ - if src == dst: - return box - - is_numpy = isinstance(box, np.ndarray) - is_Instance3DBoxes = isinstance(box, BaseInstance3DBoxes) - single_box = isinstance(box, (list, tuple)) - if single_box: - assert len(box) >= 7, ( - 'CoordMode.convert takes either a k-tuple/list or ' - 'an Nxk array/tensor, where k >= 7') - arr = torch.tensor(box)[None, :] - else: - # avoid modifying the input box - if is_numpy: - arr = torch.from_numpy(np.asarray(box)).clone() - elif is_Instance3DBoxes: - arr = box.tensor.clone() - else: - arr = box.clone() - - # convert box from `src` mode to `dst` mode. - x_size, y_size, z_size = arr[..., 3:4], arr[..., 4:5], arr[..., 5:6] - if src == Coord3DMode.LIDAR and dst == Coord3DMode.CAM: - if rt_mat is None: - rt_mat = arr.new_tensor([[0, -1, 0], [0, 0, -1], [1, 0, 0]]) - xyz_size = torch.cat([y_size, z_size, x_size], dim=-1) - elif src == Coord3DMode.CAM and dst == Coord3DMode.LIDAR: - if rt_mat is None: - rt_mat = arr.new_tensor([[0, 0, 1], [-1, 0, 0], [0, -1, 0]]) - xyz_size = torch.cat([z_size, x_size, y_size], dim=-1) - elif src == Coord3DMode.DEPTH and dst == Coord3DMode.CAM: - if rt_mat is None: - rt_mat = arr.new_tensor([[1, 0, 0], [0, 0, 1], [0, -1, 0]]) - xyz_size = torch.cat([x_size, z_size, y_size], dim=-1) - elif src == Coord3DMode.CAM and dst == Coord3DMode.DEPTH: - if rt_mat is None: - rt_mat = arr.new_tensor([[1, 0, 0], [0, 0, -1], [0, 1, 0]]) - xyz_size = torch.cat([x_size, z_size, y_size], dim=-1) - elif src == Coord3DMode.LIDAR and dst == Coord3DMode.DEPTH: - if rt_mat is None: - rt_mat = arr.new_tensor([[0, -1, 0], [1, 0, 0], [0, 0, 1]]) - xyz_size = torch.cat([y_size, x_size, z_size], dim=-1) - elif src == Coord3DMode.DEPTH and dst == Coord3DMode.LIDAR: - if rt_mat is None: - rt_mat = arr.new_tensor([[0, 1, 0], [-1, 0, 0], [0, 0, 1]]) - xyz_size = torch.cat([y_size, x_size, z_size], dim=-1) - else: - raise NotImplementedError( - f'Conversion from Coord3DMode {src} to {dst} ' - 'is not supported yet') - - if not isinstance(rt_mat, torch.Tensor): - rt_mat = arr.new_tensor(rt_mat) - if rt_mat.size(1) == 4: - extended_xyz = torch.cat( - [arr[:, :3], arr.new_ones(arr.size(0), 1)], dim=-1) - xyz = extended_xyz @ rt_mat.t() - else: - xyz = arr[:, :3] @ rt_mat.t() - - remains = arr[..., 6:] - arr = torch.cat([xyz[:, :3], xyz_size, remains], dim=-1) - - # convert arr to the original type - original_type = type(box) - if single_box: - return original_type(arr.flatten().tolist()) - if is_numpy: - return arr.numpy() - elif is_Instance3DBoxes: - if dst == Coord3DMode.CAM: - target_type = CameraInstance3DBoxes - elif dst == Coord3DMode.LIDAR: - target_type = LiDARInstance3DBoxes - elif dst == Coord3DMode.DEPTH: - target_type = DepthInstance3DBoxes - else: - raise NotImplementedError( - f'Conversion to {dst} through {original_type}' - ' is not supported yet') - return target_type( - arr, box_dim=arr.size(-1), with_yaw=box.with_yaw) - else: - return arr + return Box3DMode.convert(box, src, dst, rt_mat=rt_mat) @staticmethod def convert_point(point, src, dst, rt_mat=None): @@ -183,7 +134,7 @@ def convert_point(point, src, dst, rt_mat=None): Args: point (tuple | list | np.ndarray | - torch.Tensor | BasePoints): + torch.Tensor | :obj:`BasePoints`): Can be a k-tuple, k-list or an Nxk array/tensor. src (:obj:`CoordMode`): The src Point mode. dst (:obj:`CoordMode`): The target Point mode. @@ -194,7 +145,7 @@ def convert_point(point, src, dst, rt_mat=None): to LiDAR. This requires a transformation matrix. Returns: - (tuple | list | np.ndarray | torch.Tensor | BasePoints): \ + (tuple | list | np.ndarray | torch.Tensor | :obj:`BasePoints`): The converted point of the same type. """ if src == dst: @@ -218,8 +169,6 @@ def convert_point(point, src, dst, rt_mat=None): arr = point.clone() # convert point from `src` mode to `dst` mode. - # TODO: LIDAR - # only implemented provided Rt matrix in cam-depth conversion if src == Coord3DMode.LIDAR and dst == Coord3DMode.CAM: if rt_mat is None: rt_mat = arr.new_tensor([[0, -1, 0], [0, 0, -1], [1, 0, 0]]) @@ -247,13 +196,13 @@ def convert_point(point, src, dst, rt_mat=None): rt_mat = arr.new_tensor(rt_mat) if rt_mat.size(1) == 4: extended_xyz = torch.cat( - [arr[:, :3], arr.new_ones(arr.size(0), 1)], dim=-1) + [arr[..., :3], arr.new_ones(arr.size(0), 1)], dim=-1) xyz = extended_xyz @ rt_mat.t() else: - xyz = arr[:, :3] @ rt_mat.t() + xyz = arr[..., :3] @ rt_mat.t() - remains = arr[:, 3:] - arr = torch.cat([xyz[:, :3], remains], dim=-1) + remains = arr[..., 3:] + arr = torch.cat([xyz[..., :3], remains], dim=-1) # convert arr to the original type original_type = type(point) diff --git a/mmdet3d/core/bbox/structures/depth_box3d.py b/mmdet3d/core/bbox/structures/depth_box3d.py index 73ed25e84..d732f714f 100644 --- a/mmdet3d/core/bbox/structures/depth_box3d.py +++ b/mmdet3d/core/bbox/structures/depth_box3d.py @@ -2,7 +2,6 @@ import torch from mmdet3d.core.points import BasePoints -from mmdet3d.ops import points_in_boxes_batch from .base_box3d import BaseInstance3DBoxes from .utils import limit_period, rotation_3d_in_axis @@ -37,6 +36,7 @@ class DepthInstance3DBoxes(BaseInstance3DBoxes): with_yaw (bool): If True, the value of yaw will be set to 0 as minmax boxes. """ + YAW_AXIS = 2 @property def gravity_center(self): @@ -66,7 +66,7 @@ def corners(self): / | / | (x0, y0, z1) + ----------- + + (x1, y1, z0) | / . | / - | / oriign | / + | / origin | / (x0, y0, z0) + ----------- + --------> right x (x1, y0, z0) """ @@ -84,7 +84,8 @@ def corners(self): corners = dims.view([-1, 1, 3]) * corners_norm.reshape([1, 8, 3]) # rotate around z axis - corners = rotation_3d_in_axis(corners, self.tensor[:, 6], axis=2) + corners = rotation_3d_in_axis( + corners, self.tensor[:, 6], axis=self.YAW_AXIS) corners += self.tensor[:, :3].view(-1, 1, 3) return corners @@ -116,8 +117,8 @@ def nearest_bev(self): return bev_boxes def rotate(self, angle, points=None): - """Rotate boxes with points (optional) with the given angle or \ - rotation matrix. + """Rotate boxes with points (optional) with the given angle or rotation + matrix. Args: angle (float | torch.Tensor | np.ndarray): @@ -126,30 +127,31 @@ def rotate(self, angle, points=None): Points to rotate. Defaults to None. Returns: - tuple or None: When ``points`` is None, the function returns \ - None, otherwise it returns the rotated points and the \ + tuple or None: When ``points`` is None, the function returns + None, otherwise it returns the rotated points and the rotation matrix ``rot_mat_T``. """ if not isinstance(angle, torch.Tensor): angle = self.tensor.new_tensor(angle) + assert angle.shape == torch.Size([3, 3]) or angle.numel() == 1, \ f'invalid rotation angle shape {angle.shape}' if angle.numel() == 1: - rot_sin = torch.sin(angle) - rot_cos = torch.cos(angle) - rot_mat_T = self.tensor.new_tensor([[rot_cos, -rot_sin, 0], - [rot_sin, rot_cos, 0], - [0, 0, 1]]).T + self.tensor[:, 0:3], rot_mat_T = rotation_3d_in_axis( + self.tensor[:, 0:3], + angle, + axis=self.YAW_AXIS, + return_mat=True) else: - rot_mat_T = angle.T + rot_mat_T = angle rot_sin = rot_mat_T[0, 1] rot_cos = rot_mat_T[0, 0] angle = np.arctan2(rot_sin, rot_cos) + self.tensor[:, 0:3] = self.tensor[:, 0:3] @ rot_mat_T - self.tensor[:, 0:3] = self.tensor[:, 0:3] @ rot_mat_T if self.with_yaw: - self.tensor[:, 6] -= angle + self.tensor[:, 6] += angle else: corners_rot = self.corners @ rot_mat_T new_x_size = corners_rot[..., 0].max( @@ -164,11 +166,10 @@ def rotate(self, angle, points=None): if isinstance(points, torch.Tensor): points[:, :3] = points[:, :3] @ rot_mat_T elif isinstance(points, np.ndarray): - rot_mat_T = rot_mat_T.numpy() + rot_mat_T = rot_mat_T.cpu().numpy() points[:, :3] = np.dot(points[:, :3], rot_mat_T) elif isinstance(points, BasePoints): - # anti-clockwise - points.rotate(angle) + points.rotate(rot_mat_T) else: raise ValueError return points, rot_mat_T @@ -220,7 +221,7 @@ def in_range_bev(self, box_range): polygon, we try to reduce the burdun for simpler cases. Returns: - torch.Tensor: Indicating whether each box is inside \ + torch.Tensor: Indicating whether each box is inside the reference range. """ in_range_flags = ((self.tensor[:, 0] > box_range[0]) @@ -241,41 +242,13 @@ def convert_to(self, dst, rt_mat=None): to LiDAR. This requires a transformation matrix. Returns: - :obj:`DepthInstance3DBoxes`: \ + :obj:`DepthInstance3DBoxes`: The converted box of the same type in the ``dst`` mode. """ from .box_3d_mode import Box3DMode return Box3DMode.convert( box=self, src=Box3DMode.DEPTH, dst=dst, rt_mat=rt_mat) - def points_in_boxes(self, points): - """Find points that are in boxes (CUDA). - - Args: - points (torch.Tensor): Points in shape [1, M, 3] or [M, 3], \ - 3 dimensions are [x, y, z] in LiDAR coordinate. - - Returns: - torch.Tensor: The index of boxes each point lies in with shape \ - of (B, M, T). - """ - from .box_3d_mode import Box3DMode - - # to lidar - points_lidar = points.clone() - points_lidar = points_lidar[..., [1, 0, 2]] - points_lidar[..., 1] *= -1 - if points.dim() == 2: - points_lidar = points_lidar.unsqueeze(0) - else: - assert points.dim() == 3 and points_lidar.shape[0] == 1 - - boxes_lidar = self.convert_to(Box3DMode.LIDAR).tensor - boxes_lidar = boxes_lidar.to(points.device).unsqueeze(0) - box_idxs_of_pts = points_in_boxes_batch(points_lidar, boxes_lidar) - - return box_idxs_of_pts.squeeze(0) - def enlarged_box(self, extra_width): """Enlarge the length, width and height boxes. @@ -330,13 +303,12 @@ def get_surface_line_center(self): -1, 3) surface_rot = rot_mat_T.repeat(6, 1, 1) - surface_3d = torch.matmul( - surface_3d.unsqueeze(-2), surface_rot.transpose(2, 1)).squeeze(-2) + surface_3d = torch.matmul(surface_3d.unsqueeze(-2), + surface_rot).squeeze(-2) surface_center = center.repeat(1, 6, 1).reshape(-1, 3) + surface_3d line_rot = rot_mat_T.repeat(12, 1, 1) - line_3d = torch.matmul( - line_3d.unsqueeze(-2), line_rot.transpose(2, 1)).squeeze(-2) + line_3d = torch.matmul(line_3d.unsqueeze(-2), line_rot).squeeze(-2) line_center = center.repeat(1, 12, 1).reshape(-1, 3) + line_3d return surface_center, line_center diff --git a/mmdet3d/core/bbox/structures/lidar_box3d.py b/mmdet3d/core/bbox/structures/lidar_box3d.py index 2150d61e2..6adfa3ad0 100644 --- a/mmdet3d/core/bbox/structures/lidar_box3d.py +++ b/mmdet3d/core/bbox/structures/lidar_box3d.py @@ -2,7 +2,6 @@ import torch from mmdet3d.core.points import BasePoints -from mmdet3d.ops.roiaware_pool3d import points_in_boxes_gpu from .base_box3d import BaseInstance3DBoxes from .utils import limit_period, rotation_3d_in_axis @@ -14,16 +13,16 @@ class LiDARInstance3DBoxes(BaseInstance3DBoxes): .. code-block:: none - up z x front (yaw=-0.5*pi) - ^ ^ - | / - | / - (yaw=-pi) left y <------ 0 -------- (yaw=0) + up z x front (yaw=0) + ^ ^ + | / + | / + (yaw=0.5*pi) left y <------ 0 The relative coordinate of bottom center in a LiDAR box is (0.5, 0.5, 0), and the yaw is around the z axis, thus the rotation axis=2. - The yaw is 0 at the negative direction of y axis, and decreases from - the negative direction of y to the positive direction of x. + The yaw is 0 at the positive direction of x axis, and increases from + the positive direction of x to the positive direction of y. A refactor is ongoing to make the three coordinate systems easier to understand and convert between each other. @@ -35,6 +34,7 @@ class LiDARInstance3DBoxes(BaseInstance3DBoxes): with_yaw (bool): If True, the value of yaw will be set to 0 as minmax boxes. """ + YAW_AXIS = 2 @property def gravity_center(self): @@ -64,7 +64,7 @@ def corners(self): / | / | (x0, y0, z1) + ----------- + + (x1, y1, z0) | / . | / - | / oriign | / + | / origin | / left y<-------- + ----------- + (x0, y1, z0) (x0, y0, z0) """ @@ -82,7 +82,8 @@ def corners(self): corners = dims.view([-1, 1, 3]) * corners_norm.reshape([1, 8, 3]) # rotate around z axis - corners = rotation_3d_in_axis(corners, self.tensor[:, 6], axis=2) + corners = rotation_3d_in_axis( + corners, self.tensor[:, 6], axis=self.YAW_AXIS) corners += self.tensor[:, :3].view(-1, 1, 3) return corners @@ -114,8 +115,8 @@ def nearest_bev(self): return bev_boxes def rotate(self, angle, points=None): - """Rotate boxes with points (optional) with the given angle or \ - rotation matrix. + """Rotate boxes with points (optional) with the given angle or rotation + matrix. Args: angles (float | torch.Tensor | np.ndarray): @@ -124,28 +125,29 @@ def rotate(self, angle, points=None): Points to rotate. Defaults to None. Returns: - tuple or None: When ``points`` is None, the function returns \ - None, otherwise it returns the rotated points and the \ + tuple or None: When ``points`` is None, the function returns + None, otherwise it returns the rotated points and the rotation matrix ``rot_mat_T``. """ if not isinstance(angle, torch.Tensor): angle = self.tensor.new_tensor(angle) + assert angle.shape == torch.Size([3, 3]) or angle.numel() == 1, \ f'invalid rotation angle shape {angle.shape}' if angle.numel() == 1: - rot_sin = torch.sin(angle) - rot_cos = torch.cos(angle) - rot_mat_T = self.tensor.new_tensor([[rot_cos, -rot_sin, 0], - [rot_sin, rot_cos, 0], - [0, 0, 1]]) + self.tensor[:, 0:3], rot_mat_T = rotation_3d_in_axis( + self.tensor[:, 0:3], + angle, + axis=self.YAW_AXIS, + return_mat=True) else: rot_mat_T = angle - rot_sin = rot_mat_T[1, 0] + rot_sin = rot_mat_T[0, 1] rot_cos = rot_mat_T[0, 0] angle = np.arctan2(rot_sin, rot_cos) + self.tensor[:, 0:3] = self.tensor[:, 0:3] @ rot_mat_T - self.tensor[:, :3] = self.tensor[:, :3] @ rot_mat_T self.tensor[:, 6] += angle if self.tensor.shape[1] == 9: @@ -156,11 +158,10 @@ def rotate(self, angle, points=None): if isinstance(points, torch.Tensor): points[:, :3] = points[:, :3] @ rot_mat_T elif isinstance(points, np.ndarray): - rot_mat_T = rot_mat_T.numpy() + rot_mat_T = rot_mat_T.cpu().numpy() points[:, :3] = np.dot(points[:, :3], rot_mat_T) elif isinstance(points, BasePoints): - # clockwise - points.rotate(-angle) + points.rotate(rot_mat_T) else: raise ValueError return points, rot_mat_T @@ -182,11 +183,11 @@ def flip(self, bev_direction='horizontal', points=None): if bev_direction == 'horizontal': self.tensor[:, 1::7] = -self.tensor[:, 1::7] if self.with_yaw: - self.tensor[:, 6] = -self.tensor[:, 6] + np.pi + self.tensor[:, 6] = -self.tensor[:, 6] elif bev_direction == 'vertical': self.tensor[:, 0::7] = -self.tensor[:, 0::7] if self.with_yaw: - self.tensor[:, 6] = -self.tensor[:, 6] + self.tensor[:, 6] = -self.tensor[:, 6] + np.pi if points is not None: assert isinstance(points, (torch.Tensor, np.ndarray, BasePoints)) @@ -232,7 +233,7 @@ def convert_to(self, dst, rt_mat=None): to LiDAR. This requires a transformation matrix. Returns: - :obj:`BaseInstance3DBoxes`: \ + :obj:`BaseInstance3DBoxes`: The converted box of the same type in the ``dst`` mode. """ from .box_3d_mode import Box3DMode @@ -253,17 +254,3 @@ def enlarged_box(self, extra_width): # bottom center z minus extra_width enlarged_boxes[:, 2] -= extra_width return self.new_box(enlarged_boxes) - - def points_in_boxes(self, points): - """Find the box which the points are in. - - Args: - points (torch.Tensor): Points in shape (N, 3). - - Returns: - torch.Tensor: The index of box where each point are in. - """ - box_idx = points_in_boxes_gpu( - points.unsqueeze(0), - self.tensor.unsqueeze(0).to(points.device)).squeeze(0) - return box_idx diff --git a/mmdet3d/core/bbox/structures/utils.py b/mmdet3d/core/bbox/structures/utils.py index 8870cb426..02b35b7d1 100644 --- a/mmdet3d/core/bbox/structures/utils.py +++ b/mmdet3d/core/bbox/structures/utils.py @@ -1,84 +1,138 @@ import numpy as np import torch +from mmdet3d.core.utils import array_converter + +@array_converter(apply_to=('val', )) def limit_period(val, offset=0.5, period=np.pi): """Limit the value into a period for periodic function. Args: - val (torch.Tensor): The value to be converted. - offset (float, optional): Offset to set the value range. \ + val (torch.Tensor | np.ndarray): The value to be converted. + offset (float, optional): Offset to set the value range. Defaults to 0.5. period ([type], optional): Period of the value. Defaults to np.pi. Returns: - torch.Tensor: Value in the range of \ + (torch.Tensor | np.ndarray): Value in the range of [-offset * period, (1-offset) * period] """ - return val - torch.floor(val / period + offset) * period + limited_val = val - torch.floor(val / period + offset) * period + return limited_val -def rotation_3d_in_axis(points, angles, axis=0): +@array_converter(apply_to=('points', 'angles')) +def rotation_3d_in_axis(points, + angles, + axis=0, + return_mat=False, + clockwise=False): """Rotate points by angles according to axis. Args: - points (torch.Tensor): Points of shape (N, M, 3). - angles (torch.Tensor): Vector of angles in shape (N,) + points (np.ndarray | torch.Tensor | list | tuple ): + Points of shape (N, M, 3). + angles (np.ndarray | torch.Tensor | list | tuple | float): + Vector of angles in shape (N,) axis (int, optional): The axis to be rotated. Defaults to 0. + return_mat: Whether or not return the rotation matrix (transposed). + Defaults to False. + clockwise: Whether the rotation is clockwise. Defaults to False. Raises: - ValueError: when the axis is not in range [0, 1, 2], it will \ + ValueError: when the axis is not in range [0, 1, 2], it will raise value error. Returns: - torch.Tensor: Rotated points in shape (N, M, 3) + (torch.Tensor | np.ndarray): Rotated points in shape (N, M, 3). """ + batch_free = len(points.shape) == 2 + if batch_free: + points = points[None] + + if isinstance(angles, float) or len(angles.shape) == 0: + angles = torch.full(points.shape[:1], angles) + + assert len(points.shape) == 3 and len(angles.shape) == 1 \ + and points.shape[0] == angles.shape[0], f'Incorrect shape of points ' \ + f'angles: {points.shape}, {angles.shape}' + + assert points.shape[-1] in [2, 3], \ + f'Points size should be 2 or 3 instead of {points.shape[-1]}' + rot_sin = torch.sin(angles) rot_cos = torch.cos(angles) ones = torch.ones_like(rot_cos) zeros = torch.zeros_like(rot_cos) - if axis == 1: - rot_mat_T = torch.stack([ - torch.stack([rot_cos, zeros, -rot_sin]), - torch.stack([zeros, ones, zeros]), - torch.stack([rot_sin, zeros, rot_cos]) - ]) - elif axis == 2 or axis == -1: - rot_mat_T = torch.stack([ - torch.stack([rot_cos, -rot_sin, zeros]), - torch.stack([rot_sin, rot_cos, zeros]), - torch.stack([zeros, zeros, ones]) - ]) - elif axis == 0: + + if points.shape[-1] == 3: + if axis == 1 or axis == -2: + rot_mat_T = torch.stack([ + torch.stack([rot_cos, zeros, rot_sin]), + torch.stack([zeros, ones, zeros]), + torch.stack([-rot_sin, zeros, rot_cos]) + ]) + elif axis == 2 or axis == -1: + rot_mat_T = torch.stack([ + torch.stack([rot_cos, rot_sin, zeros]), + torch.stack([-rot_sin, rot_cos, zeros]), + torch.stack([zeros, zeros, ones]) + ]) + elif axis == 0 or axis == -3: + rot_mat_T = torch.stack([ + torch.stack([ones, zeros, zeros]), + torch.stack([zeros, rot_cos, rot_sin]), + torch.stack([zeros, -rot_sin, rot_cos]) + ]) + else: + raise ValueError(f'axis should in range ' + f'[-3, -2, -1, 0, 1, 2], got {axis}') + else: rot_mat_T = torch.stack([ - torch.stack([zeros, rot_cos, -rot_sin]), - torch.stack([zeros, rot_sin, rot_cos]), - torch.stack([ones, zeros, zeros]) + torch.stack([rot_cos, rot_sin]), + torch.stack([-rot_sin, rot_cos]) ]) + + if clockwise: + rot_mat_T = rot_mat_T.transpose(0, 1) + + if points.shape[0] == 0: + points_new = points else: - raise ValueError(f'axis should in range [0, 1, 2], got {axis}') + points_new = torch.einsum('aij,jka->aik', points, rot_mat_T) + + if batch_free: + points_new = points_new.squeeze(0) - return torch.einsum('aij,jka->aik', (points, rot_mat_T)) + if return_mat: + rot_mat_T = torch.einsum('jka->ajk', rot_mat_T) + if batch_free: + rot_mat_T = rot_mat_T.squeeze(0) + return points_new, rot_mat_T + else: + return points_new +@array_converter(apply_to=('boxes_xywhr', )) def xywhr2xyxyr(boxes_xywhr): """Convert a rotated boxes in XYWHR format to XYXYR format. Args: - boxes_xywhr (torch.Tensor): Rotated boxes in XYWHR format. + boxes_xywhr (torch.Tensor | np.ndarray): Rotated boxes in XYWHR format. Returns: - torch.Tensor: Converted boxes in XYXYR format. + (torch.Tensor | np.ndarray): Converted boxes in XYXYR format. """ boxes = torch.zeros_like(boxes_xywhr) - half_w = boxes_xywhr[:, 2] / 2 - half_h = boxes_xywhr[:, 3] / 2 - - boxes[:, 0] = boxes_xywhr[:, 0] - half_w - boxes[:, 1] = boxes_xywhr[:, 1] - half_h - boxes[:, 2] = boxes_xywhr[:, 0] + half_w - boxes[:, 3] = boxes_xywhr[:, 1] + half_h - boxes[:, 4] = boxes_xywhr[:, 4] + half_w = boxes_xywhr[..., 2] / 2 + half_h = boxes_xywhr[..., 3] / 2 + + boxes[..., 0] = boxes_xywhr[..., 0] - half_w + boxes[..., 1] = boxes_xywhr[..., 1] - half_h + boxes[..., 2] = boxes_xywhr[..., 0] + half_w + boxes[..., 3] = boxes_xywhr[..., 1] + half_h + boxes[..., 4] = boxes_xywhr[..., 4] return boxes @@ -89,6 +143,10 @@ def get_box_type(box_type): box_type (str): The type of box structure. The valid value are "LiDAR", "Camera", or "Depth". + Raises: + ValueError: A ValueError is raised when `box_type` + does not belong to the three valid types. + Returns: tuple: Box type and box mode. """ @@ -111,21 +169,24 @@ def get_box_type(box_type): return box_type_3d, box_mode_3d +@array_converter(apply_to=('points_3d', 'proj_mat')) def points_cam2img(points_3d, proj_mat, with_depth=False): - """Project points from camera coordicates to image coordinates. + """Project points in camera coordinates to image coordinates. Args: - points_3d (torch.Tensor): Points in shape (N, 3). - proj_mat (torch.Tensor): Transformation matrix between coordinates. + points_3d (torch.Tensor | np.ndarray): Points in shape (N, 3) + proj_mat (torch.Tensor | np.ndarray): + Transformation matrix between coordinates. with_depth (bool, optional): Whether to keep depth in the output. Defaults to False. Returns: - torch.Tensor: Points in image coordinates with shape [N, 2]. + (torch.Tensor | np.ndarray): Points in image coordinates, + with shape [N, 2] if `with_depth=False`, else [N, 3]. """ - points_num = list(points_3d.shape)[:-1] + points_shape = list(points_3d.shape) + points_shape[-1] = 1 - points_shape = np.concatenate([points_num, [1]], axis=0).tolist() assert len(proj_mat.shape) == 2, 'The dimension of the projection'\ f' matrix should be 2 instead of {len(proj_mat.shape)}.' d1, d2 = proj_mat.shape[:2] @@ -138,14 +199,15 @@ def points_cam2img(points_3d, proj_mat, with_depth=False): proj_mat_expanded[:d1, :d2] = proj_mat proj_mat = proj_mat_expanded - # previous implementation use new_zeros, new_one yeilds better results - points_4 = torch.cat( - [points_3d, points_3d.new_ones(*points_shape)], dim=-1) - point_2d = torch.matmul(points_4, proj_mat.t()) + # previous implementation use new_zeros, new_one yields better results + points_4 = torch.cat([points_3d, points_3d.new_ones(points_shape)], dim=-1) + + point_2d = points_4 @ proj_mat.T point_2d_res = point_2d[..., :2] / point_2d[..., 2:3] if with_depth: - return torch.cat([point_2d_res, point_2d[..., 2:3]], dim=-1) + point_2d_res = torch.cat([point_2d_res, point_2d[..., 2:3]], dim=-1) + return point_2d_res @@ -160,9 +222,9 @@ def mono_cam_box2vis(cam_box): After applying this function, we can project and draw it on 2D images. Args: - cam_box (:obj:`CameraInstance3DBoxes`): 3D bbox in camera coordinate \ - system before conversion. Could be gt bbox loaded from dataset or \ - network prediction output. + cam_box (:obj:`CameraInstance3DBoxes`): 3D bbox in camera coordinate + system before conversion. Could be gt bbox loaded from dataset + or network prediction output. Returns: :obj:`CameraInstance3DBoxes`: Box after conversion. diff --git a/mmdet3d/core/bbox/transforms.py b/mmdet3d/core/bbox/transforms.py index 686618e22..e5f5778b6 100644 --- a/mmdet3d/core/bbox/transforms.py +++ b/mmdet3d/core/bbox/transforms.py @@ -31,7 +31,7 @@ def bbox3d2roi(bbox_list): corresponding to a batch of images. Returns: - torch.Tensor: Region of interests in shape (n, c), where \ + torch.Tensor: Region of interests in shape (n, c), where the channels are in order of [batch_ind, x, y ...]. """ rois_list = [] @@ -53,7 +53,7 @@ def bbox3d2result(bboxes, scores, labels, attrs=None): bboxes (torch.Tensor): Bounding boxes with shape of (n, 5). labels (torch.Tensor): Labels with shape of (n, ). scores (torch.Tensor): Scores with shape of (n, ). - attrs (torch.Tensor, optional): Attributes with shape of (n, ). \ + attrs (torch.Tensor, optional): Attributes with shape of (n, ). Defaults to None. Returns: diff --git a/mmdet3d/core/points/base_points.py b/mmdet3d/core/points/base_points.py index 8e82dd9c6..2a572ff99 100644 --- a/mmdet3d/core/points/base_points.py +++ b/mmdet3d/core/points/base_points.py @@ -3,6 +3,8 @@ import warnings from abc import abstractmethod +from ..bbox.structures.utils import rotation_3d_in_axis + class BasePoints(object): """Base class for Points. @@ -140,7 +142,7 @@ def rotate(self, rotation, axis=None): """Rotate points with the given rotation matrix or angle. Args: - rotation (float, np.ndarray, torch.Tensor): Rotation matrix + rotation (float | np.ndarray | torch.Tensor): Rotation matrix or angle. axis (int): Axis to rotate at. Defaults to None. """ @@ -153,28 +155,14 @@ def rotate(self, rotation, axis=None): axis = self.rotation_axis if rotation.numel() == 1: - rot_sin = torch.sin(rotation) - rot_cos = torch.cos(rotation) - if axis == 1: - rot_mat_T = rotation.new_tensor([[rot_cos, 0, -rot_sin], - [0, 1, 0], - [rot_sin, 0, rot_cos]]) - elif axis == 2 or axis == -1: - rot_mat_T = rotation.new_tensor([[rot_cos, -rot_sin, 0], - [rot_sin, rot_cos, 0], - [0, 0, 1]]) - elif axis == 0: - rot_mat_T = rotation.new_tensor([[0, rot_cos, -rot_sin], - [0, rot_sin, rot_cos], - [1, 0, 0]]) - else: - raise ValueError('axis should in range') - rot_mat_T = rot_mat_T.T - elif rotation.numel() == 9: - rot_mat_T = rotation + rotated_points, rot_mat_T = rotation_3d_in_axis( + self.tensor[:, :3][None], rotation, axis=axis, return_mat=True) + self.tensor[:, :3] = rotated_points.squeeze(0) + rot_mat_T = rot_mat_T.squeeze(0) else: - raise NotImplementedError - self.tensor[:, :3] = self.tensor[:, :3] @ rot_mat_T + # rotation.numel() == 9 + self.tensor[:, :3] = self.tensor[:, :3] @ rotation + rot_mat_T = rotation return rot_mat_T diff --git a/mmdet3d/core/utils/__init__.py b/mmdet3d/core/utils/__init__.py index ad936667b..7aa874543 100644 --- a/mmdet3d/core/utils/__init__.py +++ b/mmdet3d/core/utils/__init__.py @@ -1,3 +1,7 @@ +from .array_converter import ArrayConverter, array_converter from .gaussian import draw_heatmap_gaussian, gaussian_2d, gaussian_radius -__all__ = ['gaussian_2d', 'gaussian_radius', 'draw_heatmap_gaussian'] +__all__ = [ + 'gaussian_2d', 'gaussian_radius', 'draw_heatmap_gaussian', + 'ArrayConverter', 'array_converter' +] diff --git a/mmdet3d/core/utils/array_converter.py b/mmdet3d/core/utils/array_converter.py new file mode 100644 index 000000000..eeb369997 --- /dev/null +++ b/mmdet3d/core/utils/array_converter.py @@ -0,0 +1,320 @@ +import functools +import numpy as np +import torch +from inspect import getfullargspec + + +def array_converter(to_torch=True, + apply_to=tuple(), + template_arg_name_=None, + recover=True): + """Wrapper function for data-type agnostic processing. + + First converts input arrays to PyTorch tensors or NumPy ndarrays + for middle calculation, then convert output to original data-type. + + Args: + to_torch (Bool): Whether convert to PyTorch tensors + for middle calculation. Defaults to True. + apply_to (tuple[str]): The arguments to which we apply data-type + conversion. Defaults to an empty tuple. + template_arg_name_ (str): Argument serving as the template ( + return arrays should have the same dtype and device + as the template). Defaults to None. If None, we will use the + first argument in `apply_to` as the template argument. + recover (Bool): Whether or not recover the wrapped function outputs + to the `template_arg_name_` type. Defaults to True. + + Raises: + ValueError: When template_arg_name_ is not among all args, or + when apply_to contains an arg which is not among all args, + a ValueError will be raised. When the template argument or + an argument to convert is a list or tuple, and cannot be + converted to a NumPy array, a ValueError will be raised. + TypeError: When the type of the template argument or + an argument to convert does not belong to the above range, + or the contents of such an list-or-tuple-type argument + do not share the same data type, a TypeError is raised. + + Returns: + (function): wrapped function. + + Example: + >>> import torch + >>> import numpy as np + >>> + >>> # Use torch addition for a + b, + >>> # and convert return values to the type of a + >>> @array_converter(apply_to=('a', 'b')) + >>> def simple_add(a, b): + >>> return a + b + >>> + >>> a = np.array([1.1]) + >>> b = np.array([2.2]) + >>> simple_add(a, b) + >>> + >>> # Use numpy addition for a + b, + >>> # and convert return values to the type of b + >>> @array_converter(to_torch=False, apply_to=('a', 'b'), + >>> template_arg_name_='b') + >>> def simple_add(a, b): + >>> return a + b + >>> + >>> simple_add() + >>> + >>> # Use torch funcs for floor(a) if flag=True else ceil(a), + >>> # and return the torch tensor + >>> @array_converter(apply_to=('a',), recover=False) + >>> def floor_or_ceil(a, flag=True): + >>> return torch.floor(a) if flag else torch.ceil(a) + >>> + >>> floor_or_ceil(a, flag=False) + """ + + def array_converter_wrapper(func): + """Outer wrapper for the function.""" + + @functools.wraps(func) + def new_func(*args, **kwargs): + """Inner wrapper for the arguments.""" + if len(apply_to) == 0: + return func(*args, **kwargs) + + func_name = func.__name__ + + arg_spec = getfullargspec(func) + + arg_names = arg_spec.args + arg_num = len(arg_names) + default_arg_values = arg_spec.defaults + if default_arg_values is None: + default_arg_values = [] + no_default_arg_num = len(arg_names) - len(default_arg_values) + + kwonly_arg_names = arg_spec.kwonlyargs + kwonly_default_arg_values = arg_spec.kwonlydefaults + if kwonly_default_arg_values is None: + kwonly_default_arg_values = {} + + all_arg_names = arg_names + kwonly_arg_names + + # in case there are args in the form of *args + if len(args) > arg_num: + named_args = args[:arg_num] + nameless_args = args[arg_num:] + else: + named_args = args + nameless_args = [] + + # template argument data type is used for all array-like arguments + if template_arg_name_ is None: + template_arg_name = apply_to[0] + else: + template_arg_name = template_arg_name_ + + if template_arg_name not in all_arg_names: + raise ValueError(f'{template_arg_name} is not among the ' + f'argument list of function {func_name}') + + # inspect apply_to + for arg_to_apply in apply_to: + if arg_to_apply not in all_arg_names: + raise ValueError(f'{arg_to_apply} is not ' + f'an argument of {func_name}') + + new_args = [] + new_kwargs = {} + + converter = ArrayConverter() + target_type = torch.Tensor if to_torch else np.ndarray + + # non-keyword arguments + for i, arg_value in enumerate(named_args): + if arg_names[i] in apply_to: + new_args.append( + converter.convert( + input_array=arg_value, target_type=target_type)) + else: + new_args.append(arg_value) + + if arg_names[i] == template_arg_name: + template_arg_value = arg_value + + kwonly_default_arg_values.update(kwargs) + kwargs = kwonly_default_arg_values + + # keyword arguments and non-keyword arguments using default value + for i in range(len(named_args), len(all_arg_names)): + arg_name = all_arg_names[i] + if arg_name in kwargs: + if arg_name in apply_to: + new_kwargs[arg_name] = converter.convert( + input_array=kwargs[arg_name], + target_type=target_type) + else: + new_kwargs[arg_name] = kwargs[arg_name] + else: + default_value = default_arg_values[i - no_default_arg_num] + if arg_name in apply_to: + new_kwargs[arg_name] = converter.convert( + input_array=default_value, target_type=target_type) + else: + new_kwargs[arg_name] = default_value + if arg_name == template_arg_name: + template_arg_value = kwargs[arg_name] + + # add nameless args provided by *args (if exists) + new_args += nameless_args + + return_values = func(*new_args, **new_kwargs) + converter.set_template(template_arg_value) + + def recursive_recover(input_data): + if isinstance(input_data, (tuple, list)): + new_data = [] + for item in input_data: + new_data.append(recursive_recover(item)) + return tuple(new_data) if isinstance(input_data, + tuple) else new_data + elif isinstance(input_data, dict): + new_data = {} + for k, v in input_data.items(): + new_data[k] = recursive_recover(v) + return new_data + elif isinstance(input_data, (torch.Tensor, np.ndarray)): + return converter.recover(input_data) + else: + return input_data + + if recover: + return recursive_recover(return_values) + else: + return return_values + + return new_func + + return array_converter_wrapper + + +class ArrayConverter: + + SUPPORTED_NON_ARRAY_TYPES = (int, float, np.int8, np.int16, np.int32, + np.int64, np.uint8, np.uint16, np.uint32, + np.uint64, np.float16, np.float32, np.float64) + + def __init__(self, template_array=None): + if template_array is not None: + self.set_template(template_array) + + def set_template(self, array): + """Set template array. + + Args: + array (tuple | list | int | float | np.ndarray | torch.Tensor): + Template array. + + Raises: + ValueError: If input is list or tuple and cannot be converted to + to a NumPy array, a ValueError is raised. + TypeError: If input type does not belong to the above range, + or the contents of a list or tuple do not share the + same data type, a TypeError is raised. + """ + self.array_type = type(array) + self.is_num = False + self.device = 'cpu' + + if isinstance(array, np.ndarray): + self.dtype = array.dtype + elif isinstance(array, torch.Tensor): + self.dtype = array.dtype + self.device = array.device + elif isinstance(array, (list, tuple)): + try: + array = np.array(array) + if array.dtype not in self.SUPPORTED_NON_ARRAY_TYPES: + raise TypeError + self.dtype = array.dtype + except (ValueError, TypeError): + print(f'The following list cannot be converted to' + f' a numpy array of supported dtype:\n{array}') + raise + elif isinstance(array, self.SUPPORTED_NON_ARRAY_TYPES): + self.array_type = np.ndarray + self.is_num = True + self.dtype = np.dtype(type(array)) + else: + raise TypeError(f'Template type {self.array_type}' + f' is not supported.') + + def convert(self, input_array, target_type=None, target_array=None): + """Convert input array to target data type. + + Args: + input_array (tuple | list | np.ndarray | + torch.Tensor | int | float ): + Input array. Defaults to None. + target_type ( | ): + Type to which input array is converted. Defaults to None. + target_array (np.ndarray | torch.Tensor): + Template array to which input array is converted. + Defaults to None. + + Raises: + ValueError: If input is list or tuple and cannot be converted to + to a NumPy array, a ValueError is raised. + TypeError: If input type does not belong to the above range, + or the contents of a list or tuple do not share the + same data type, a TypeError is raised. + """ + if isinstance(input_array, (list, tuple)): + try: + input_array = np.array(input_array) + if input_array.dtype not in self.SUPPORTED_NON_ARRAY_TYPES: + raise TypeError + except (ValueError, TypeError): + print(f'The input cannot be converted to' + f' a single-type numpy array:\n{input_array}') + raise + elif isinstance(input_array, self.SUPPORTED_NON_ARRAY_TYPES): + input_array = np.array(input_array) + array_type = type(input_array) + assert target_type is not None or target_array is not None, \ + 'must specify a target' + if target_type is not None: + assert target_type in (np.ndarray, torch.Tensor), \ + 'invalid target type' + if target_type == array_type: + return input_array + elif target_type == np.ndarray: + # default dtype is float32 + converted_array = input_array.cpu().numpy().astype(np.float32) + else: + # default dtype is float32, device is 'cpu' + converted_array = torch.tensor( + input_array, dtype=torch.float32) + else: + assert isinstance(target_array, (np.ndarray, torch.Tensor)), \ + 'invalid target array type' + if isinstance(target_array, array_type): + return input_array + elif isinstance(target_array, np.ndarray): + converted_array = input_array.cpu().numpy().astype( + target_array.dtype) + else: + converted_array = target_array.new_tensor(input_array) + return converted_array + + def recover(self, input_array): + assert isinstance(input_array, (np.ndarray, torch.Tensor)), \ + 'invalid input array type' + if isinstance(input_array, self.array_type): + return input_array + elif isinstance(input_array, torch.Tensor): + converted_array = input_array.cpu().numpy().astype(self.dtype) + else: + converted_array = torch.tensor( + input_array, dtype=self.dtype, device=self.device) + if self.is_num: + converted_array = converted_array.item() + return converted_array diff --git a/mmdet3d/core/visualizer/show_result.py b/mmdet3d/core/visualizer/show_result.py index 1f6346158..8967f32e2 100644 --- a/mmdet3d/core/visualizer/show_result.py +++ b/mmdet3d/core/visualizer/show_result.py @@ -110,16 +110,14 @@ def show_result(points, if gt_bboxes is not None: # bottom center to gravity center gt_bboxes[..., 2] += gt_bboxes[..., 5] / 2 - # the positive direction for yaw in meshlab is clockwise - gt_bboxes[:, 6] *= -1 + _write_oriented_bbox(gt_bboxes, osp.join(result_path, f'{filename}_gt.obj')) if pred_bboxes is not None: # bottom center to gravity center pred_bboxes[..., 2] += pred_bboxes[..., 5] / 2 - # the positive direction for yaw in meshlab is clockwise - pred_bboxes[:, 6] *= -1 + _write_oriented_bbox(pred_bboxes, osp.join(result_path, f'{filename}_pred.obj')) diff --git a/mmdet3d/datasets/kitti_dataset.py b/mmdet3d/datasets/kitti_dataset.py index b6fd26a72..23dc207f8 100644 --- a/mmdet3d/datasets/kitti_dataset.py +++ b/mmdet3d/datasets/kitti_dataset.py @@ -616,8 +616,6 @@ def convert_valid_bboxes(self, box_dict, info): scores = box_dict['scores_3d'] labels = box_dict['labels_3d'] sample_idx = info['image']['image_idx'] - # TODO: remove the hack of yaw - box_preds.tensor[:, -1] = box_preds.tensor[:, -1] - np.pi box_preds.limit_yaw(offset=0.5, period=np.pi * 2) if len(box_preds) == 0: diff --git a/mmdet3d/datasets/lyft_dataset.py b/mmdet3d/datasets/lyft_dataset.py index a6d55632c..d701fc6a9 100644 --- a/mmdet3d/datasets/lyft_dataset.py +++ b/mmdet3d/datasets/lyft_dataset.py @@ -516,16 +516,16 @@ def output_to_lyft_box(detection): box_gravity_center = box3d.gravity_center.numpy() box_dims = box3d.dims.numpy() box_yaw = box3d.yaw.numpy() - # TODO: check whether this is necessary - # with dir_offset & dir_limit in the head - box_yaw = -box_yaw - np.pi / 2 + + # our LiDAR coordinate system -> Lyft box coordinate system + lyft_box_dims = box_dims[:, [1, 0, 2]] box_list = [] for i in range(len(box3d)): quat = Quaternion(axis=[0, 0, 1], radians=box_yaw[i]) box = LyftBox( box_gravity_center[i], - box_dims[i], + lyft_box_dims[i], quat, label=labels[i], score=scores[i]) diff --git a/mmdet3d/datasets/nuscenes_dataset.py b/mmdet3d/datasets/nuscenes_dataset.py index a7d4b0637..4d07a576f 100644 --- a/mmdet3d/datasets/nuscenes_dataset.py +++ b/mmdet3d/datasets/nuscenes_dataset.py @@ -587,9 +587,9 @@ def output_to_nusc_box(detection): box_gravity_center = box3d.gravity_center.numpy() box_dims = box3d.dims.numpy() box_yaw = box3d.yaw.numpy() - # TODO: check whether this is necessary - # with dir_offset & dir_limit in the head - box_yaw = -box_yaw - np.pi / 2 + + # our LiDAR coordinate system -> nuScenes box coordinate system + nus_box_dims = box_dims[:, [1, 0, 2]] box_list = [] for i in range(len(box3d)): @@ -601,7 +601,7 @@ def output_to_nusc_box(detection): # velo_val * np.cos(velo_ori), velo_val * np.sin(velo_ori), 0.0) box = NuScenesBox( box_gravity_center[i], - box_dims[i], + nus_box_dims[i], quat, label=labels[i], score=scores[i], diff --git a/mmdet3d/datasets/pipelines/data_augment_utils.py b/mmdet3d/datasets/pipelines/data_augment_utils.py index e0bc7165a..2cb392504 100644 --- a/mmdet3d/datasets/pipelines/data_augment_utils.py +++ b/mmdet3d/datasets/pipelines/data_augment_utils.py @@ -20,8 +20,8 @@ def _rotation_box2d_jit_(corners, angle, rot_mat_T): rot_sin = np.sin(angle) rot_cos = np.cos(angle) rot_mat_T[0, 0] = rot_cos - rot_mat_T[0, 1] = -rot_sin - rot_mat_T[1, 0] = rot_sin + rot_mat_T[0, 1] = rot_sin + rot_mat_T[1, 0] = -rot_sin rot_mat_T[1, 1] = rot_cos corners[:] = corners @ rot_mat_T @@ -210,8 +210,8 @@ def noise_per_box_v2_(boxes, valid_mask, loc_noises, rot_noises, rot_sin = np.sin(current_box[0, -1]) rot_cos = np.cos(current_box[0, -1]) rot_mat_T[0, 0] = rot_cos - rot_mat_T[0, 1] = -rot_sin - rot_mat_T[1, 0] = rot_sin + rot_mat_T[0, 1] = rot_sin + rot_mat_T[1, 0] = -rot_sin rot_mat_T[1, 1] = rot_cos current_corners[:] = current_box[ 0, 2:4] * corners_norm @ rot_mat_T + current_box[0, :2] @@ -263,18 +263,18 @@ def _rotation_matrix_3d_(rot_mat_T, angle, axis): rot_mat_T[:] = np.eye(3) if axis == 1: rot_mat_T[0, 0] = rot_cos - rot_mat_T[0, 2] = -rot_sin - rot_mat_T[2, 0] = rot_sin + rot_mat_T[0, 2] = rot_sin + rot_mat_T[2, 0] = -rot_sin rot_mat_T[2, 2] = rot_cos elif axis == 2 or axis == -1: rot_mat_T[0, 0] = rot_cos - rot_mat_T[0, 1] = -rot_sin - rot_mat_T[1, 0] = rot_sin + rot_mat_T[0, 1] = rot_sin + rot_mat_T[1, 0] = -rot_sin rot_mat_T[1, 1] = rot_cos elif axis == 0: rot_mat_T[1, 1] = rot_cos - rot_mat_T[1, 2] = -rot_sin - rot_mat_T[2, 1] = rot_sin + rot_mat_T[1, 2] = rot_sin + rot_mat_T[2, 1] = -rot_sin rot_mat_T[2, 2] = rot_cos diff --git a/mmdet3d/datasets/pipelines/formating.py b/mmdet3d/datasets/pipelines/formating.py index dd8a219bb..f75dc679a 100644 --- a/mmdet3d/datasets/pipelines/formating.py +++ b/mmdet3d/datasets/pipelines/formating.py @@ -140,7 +140,8 @@ def __init__(self, 'pcd_vertical_flip', 'box_mode_3d', 'box_type_3d', 'img_norm_cfg', 'rect', 'Trv2c', 'P2', 'pcd_trans', 'sample_idx', 'pcd_scale_factor', 'pcd_rotation', - 'pts_filename', 'transformation_3d_flow')): + 'pcd_rotation_angle', 'pts_filename', + 'transformation_3d_flow')): self.keys = keys self.meta_keys = meta_keys diff --git a/mmdet3d/datasets/pipelines/transforms_3d.py b/mmdet3d/datasets/pipelines/transforms_3d.py index f37fb56f2..39ea505ec 100644 --- a/mmdet3d/datasets/pipelines/transforms_3d.py +++ b/mmdet3d/datasets/pipelines/transforms_3d.py @@ -381,7 +381,7 @@ def __call__(self, input_dict): gt_bboxes_3d = input_dict['gt_bboxes_3d'] points = input_dict['points'] - # TODO: check this inplace function + # TODO: this is inplace operation numpy_box = gt_bboxes_3d.tensor.numpy() numpy_points = points.tensor.numpy() @@ -576,6 +576,7 @@ def _rot_bbox_points(self, input_dict): if len(input_dict['bbox3d_fields']) == 0: rot_mat_T = input_dict['points'].rotate(noise_rotation) input_dict['pcd_rotation'] = rot_mat_T + input_dict['pcd_rotation_angle'] = noise_rotation return # rotate points with bboxes @@ -585,6 +586,7 @@ def _rot_bbox_points(self, input_dict): noise_rotation, input_dict['points']) input_dict['points'] = points input_dict['pcd_rotation'] = rot_mat_T + input_dict['pcd_rotation_angle'] = noise_rotation def _scale_bbox_points(self, input_dict): """Private function to scale bounding boxes and points. diff --git a/mmdet3d/datasets/waymo_dataset.py b/mmdet3d/datasets/waymo_dataset.py index 105f39c9b..27eb3af72 100644 --- a/mmdet3d/datasets/waymo_dataset.py +++ b/mmdet3d/datasets/waymo_dataset.py @@ -493,7 +493,6 @@ def convert_valid_bboxes(self, box_dict, info): scores = box_dict['scores_3d'] labels = box_dict['labels_3d'] sample_idx = info['image']['image_idx'] - # TODO: remove the hack of yaw box_preds.limit_yaw(offset=0.5, period=np.pi * 2) if len(box_preds) == 0: diff --git a/mmdet3d/models/dense_heads/anchor3d_head.py b/mmdet3d/models/dense_heads/anchor3d_head.py index 59c79129a..d5be4735c 100644 --- a/mmdet3d/models/dense_heads/anchor3d_head.py +++ b/mmdet3d/models/dense_heads/anchor3d_head.py @@ -50,15 +50,15 @@ def __init__(self, type='Anchor3DRangeGenerator', range=[0, -39.68, -1.78, 69.12, 39.68, -1.78], strides=[2], - sizes=[[1.6, 3.9, 1.56]], + sizes=[[3.9, 1.6, 1.56]], rotations=[0, 1.57], custom_values=[], reshape_out=False), assigner_per_size=False, assign_per_class=False, diff_rad_by_sin=True, - dir_offset=0, - dir_limit_offset=1, + dir_offset=-np.pi / 2, + dir_limit_offset=0, bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder'), loss_cls=dict( type='CrossEntropyLoss', diff --git a/mmdet3d/models/dense_heads/anchor_free_mono3d_head.py b/mmdet3d/models/dense_heads/anchor_free_mono3d_head.py index 357e37038..65c4e66e6 100644 --- a/mmdet3d/models/dense_heads/anchor_free_mono3d_head.py +++ b/mmdet3d/models/dense_heads/anchor_free_mono3d_head.py @@ -78,6 +78,7 @@ def __init__( use_direction_classifier=True, diff_rad_by_sin=True, dir_offset=0, + dir_limit_offset=0, loss_cls=dict( type='FocalLoss', use_sigmoid=True, diff --git a/mmdet3d/models/dense_heads/fcos_mono3d_head.py b/mmdet3d/models/dense_heads/fcos_mono3d_head.py index acaa536f1..d3c56d9bb 100644 --- a/mmdet3d/models/dense_heads/fcos_mono3d_head.py +++ b/mmdet3d/models/dense_heads/fcos_mono3d_head.py @@ -216,6 +216,7 @@ def add_sin_difference(boxes1, boxes2): @staticmethod def get_direction_target(reg_targets, dir_offset=0, + dir_limit_offset=0, num_bins=2, one_hot=True): """Encode direction to 0 ~ num_bins-1. @@ -230,7 +231,8 @@ def get_direction_target(reg_targets, torch.Tensor: Encoded direction targets. """ rot_gt = reg_targets[..., 6] - offset_rot = limit_period(rot_gt - dir_offset, 0, 2 * np.pi) + offset_rot = limit_period(rot_gt - dir_offset, dir_limit_offset, + 2 * np.pi) dir_cls_targets = torch.floor(offset_rot / (2 * np.pi / num_bins)).long() dir_cls_targets = torch.clamp(dir_cls_targets, min=0, max=num_bins - 1) @@ -376,7 +378,10 @@ def loss(self, if self.use_direction_classifier: pos_dir_cls_targets = self.get_direction_target( - pos_bbox_targets_3d, self.dir_offset, one_hot=False) + pos_bbox_targets_3d, + self.dir_offset, + self.dir_limit_offset, + one_hot=False) if self.diff_rad_by_sin: pos_bbox_preds, pos_bbox_targets_3d = self.add_sin_difference( diff --git a/mmdet3d/models/dense_heads/free_anchor3d_head.py b/mmdet3d/models/dense_heads/free_anchor3d_head.py index 76cad122b..925220ec1 100644 --- a/mmdet3d/models/dense_heads/free_anchor3d_head.py +++ b/mmdet3d/models/dense_heads/free_anchor3d_head.py @@ -194,6 +194,7 @@ def loss(self, matched_anchors, matched_object_targets, self.dir_offset, + self.dir_limit_offset, one_hot=False) loss_dir = self.loss_dir( dir_cls_preds_[matched].transpose(-2, -1), diff --git a/mmdet3d/models/dense_heads/groupfree3d_head.py b/mmdet3d/models/dense_heads/groupfree3d_head.py index 87fd9598a..a49822873 100644 --- a/mmdet3d/models/dense_heads/groupfree3d_head.py +++ b/mmdet3d/models/dense_heads/groupfree3d_head.py @@ -950,7 +950,7 @@ def multiclass_nms_single(self, obj_scores, sem_scores, bbox, points, box_dim=bbox.shape[-1], with_yaw=self.bbox_coder.with_rot, origin=(0.5, 0.5, 0.5)) - box_indices = bbox.points_in_boxes(points) + box_indices = bbox.points_in_boxes_batch(points) corner3d = bbox.corners minmax_box3d = corner3d.new(torch.Size((corner3d.shape[0], 6))) diff --git a/mmdet3d/models/dense_heads/parta2_rpn_head.py b/mmdet3d/models/dense_heads/parta2_rpn_head.py index 24492aec6..418a161f5 100644 --- a/mmdet3d/models/dense_heads/parta2_rpn_head.py +++ b/mmdet3d/models/dense_heads/parta2_rpn_head.py @@ -59,15 +59,15 @@ def __init__(self, type='Anchor3DRangeGenerator', range=[0, -39.68, -1.78, 69.12, 39.68, -1.78], strides=[2], - sizes=[[1.6, 3.9, 1.56]], + sizes=[[3.9, 1.6, 1.56]], rotations=[0, 1.57], custom_values=[], reshape_out=False), assigner_per_size=False, assign_per_class=False, diff_rad_by_sin=True, - dir_offset=0, - dir_limit_offset=1, + dir_offset=-np.pi / 2, + dir_limit_offset=0, bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder'), loss_cls=dict( type='CrossEntropyLoss', diff --git a/mmdet3d/models/dense_heads/ssd_3d_head.py b/mmdet3d/models/dense_heads/ssd_3d_head.py index aea287939..c5feee107 100644 --- a/mmdet3d/models/dense_heads/ssd_3d_head.py +++ b/mmdet3d/models/dense_heads/ssd_3d_head.py @@ -1,4 +1,3 @@ -import numpy as np import torch from mmcv.ops.nms import batched_nms from mmcv.runner import force_fp32 @@ -463,9 +462,7 @@ def get_bboxes(self, points, bbox_preds, input_metas, rescale=False): bbox_selected, score_selected, labels = self.multiclass_nms_single( obj_scores[b], sem_scores[b], bbox3d[b], points[b, ..., :3], input_metas[b]) - # fix the wrong direction - # To do: remove this ops - bbox_selected[..., 6] += np.pi + bbox = input_metas[b]['box_type_3d']( bbox_selected.clone(), box_dim=bbox_selected.shape[-1], @@ -488,23 +485,14 @@ def multiclass_nms_single(self, obj_scores, sem_scores, bbox, points, Returns: tuple[torch.Tensor]: Bounding boxes, scores and labels. """ - num_bbox = bbox.shape[0] bbox = input_meta['box_type_3d']( bbox.clone(), box_dim=bbox.shape[-1], with_yaw=self.bbox_coder.with_rot, origin=(0.5, 0.5, 0.5)) - if isinstance(bbox, LiDARInstance3DBoxes): - box_idx = bbox.points_in_boxes(points) - box_indices = box_idx.new_zeros([num_bbox + 1]) - box_idx[box_idx == -1] = num_bbox - box_indices.scatter_add_(0, box_idx.long(), - box_idx.new_ones(box_idx.shape)) - box_indices = box_indices[:-1] - nonempty_box_mask = box_indices >= 0 - elif isinstance(bbox, DepthInstance3DBoxes): - box_indices = bbox.points_in_boxes(points) + if isinstance(bbox, (LiDARInstance3DBoxes, DepthInstance3DBoxes)): + box_indices = bbox.points_in_boxes_batch(points) nonempty_box_mask = box_indices.T.sum(1) >= 0 else: raise NotImplementedError('Unsupported bbox type!') @@ -559,18 +547,8 @@ def _assign_targets_by_points_inside(self, bboxes_3d, points): tuple[torch.Tensor]: Flags indicating whether each point is inside bbox and the index of box where each point are in. """ - # TODO: align points_in_boxes function in each box_structures - num_bbox = bboxes_3d.tensor.shape[0] - if isinstance(bboxes_3d, LiDARInstance3DBoxes): - assignment = bboxes_3d.points_in_boxes(points).long() - points_mask = assignment.new_zeros( - [assignment.shape[0], num_bbox + 1]) - assignment[assignment == -1] = num_bbox - points_mask.scatter_(1, assignment.unsqueeze(1), 1) - points_mask = points_mask[:, :-1] - assignment[assignment == num_bbox] = num_bbox - 1 - elif isinstance(bboxes_3d, DepthInstance3DBoxes): - points_mask = bboxes_3d.points_in_boxes(points) + if isinstance(bboxes_3d, (LiDARInstance3DBoxes, DepthInstance3DBoxes)): + points_mask = bboxes_3d.points_in_boxes_batch(points) assignment = points_mask.argmax(dim=-1) else: raise NotImplementedError('Unsupported bbox type!') diff --git a/mmdet3d/models/dense_heads/train_mixins.py b/mmdet3d/models/dense_heads/train_mixins.py index f785a9dc0..21ce73847 100644 --- a/mmdet3d/models/dense_heads/train_mixins.py +++ b/mmdet3d/models/dense_heads/train_mixins.py @@ -292,6 +292,7 @@ def anchor_target_single_assigner(self, sampling_result.pos_bboxes, pos_bbox_targets, self.dir_offset, + self.dir_limit_offset, one_hot=False) bbox_targets[pos_inds, :] = pos_bbox_targets bbox_weights[pos_inds, :] = 1.0 @@ -317,6 +318,7 @@ def anchor_target_single_assigner(self, def get_direction_target(anchors, reg_targets, dir_offset=0, + dir_limit_offset=0, num_bins=2, one_hot=True): """Encode direction to 0 ~ num_bins-1. @@ -332,7 +334,7 @@ def get_direction_target(anchors, torch.Tensor: Encoded direction targets. """ rot_gt = reg_targets[..., 6] + anchors[..., 6] - offset_rot = limit_period(rot_gt - dir_offset, 0, 2 * np.pi) + offset_rot = limit_period(rot_gt - dir_offset, dir_limit_offset, 2 * np.pi) dir_cls_targets = torch.floor(offset_rot / (2 * np.pi / num_bins)).long() dir_cls_targets = torch.clamp(dir_cls_targets, min=0, max=num_bins - 1) if one_hot: diff --git a/mmdet3d/models/dense_heads/vote_head.py b/mmdet3d/models/dense_heads/vote_head.py index e03fe4441..13371b1bb 100644 --- a/mmdet3d/models/dense_heads/vote_head.py +++ b/mmdet3d/models/dense_heads/vote_head.py @@ -470,7 +470,7 @@ def get_targets_single(self, vote_target_masks = points.new_zeros([num_points], dtype=torch.long) vote_target_idx = points.new_zeros([num_points], dtype=torch.long) - box_indices_all = gt_bboxes_3d.points_in_boxes(points) + box_indices_all = gt_bboxes_3d.points_in_boxes_batch(points) for i in range(gt_labels_3d.shape[0]): box_indices = box_indices_all[:, i] indices = torch.nonzero( @@ -620,7 +620,7 @@ def multiclass_nms_single(self, obj_scores, sem_scores, bbox, points, box_dim=bbox.shape[-1], with_yaw=self.bbox_coder.with_rot, origin=(0.5, 0.5, 0.5)) - box_indices = bbox.points_in_boxes(points) + box_indices = bbox.points_in_boxes_batch(points) corner3d = bbox.corners minmax_box3d = corner3d.new(torch.Size((corner3d.shape[0], 6))) diff --git a/mmdet3d/models/roi_heads/bbox_heads/h3d_bbox_head.py b/mmdet3d/models/roi_heads/bbox_heads/h3d_bbox_head.py index fc4bfc3b0..75d40af53 100644 --- a/mmdet3d/models/roi_heads/bbox_heads/h3d_bbox_head.py +++ b/mmdet3d/models/roi_heads/bbox_heads/h3d_bbox_head.py @@ -501,7 +501,7 @@ def multiclass_nms_single(self, obj_scores, sem_scores, bbox, points, box_dim=bbox.shape[-1], with_yaw=self.bbox_coder.with_rot, origin=(0.5, 0.5, 0.5)) - box_indices = bbox.points_in_boxes(points) + box_indices = bbox.points_in_boxes_batch(points) corner3d = bbox.corners minmax_box3d = corner3d.new(torch.Size((corner3d.shape[0], 6))) diff --git a/mmdet3d/models/roi_heads/bbox_heads/parta2_bbox_head.py b/mmdet3d/models/roi_heads/bbox_heads/parta2_bbox_head.py index 79d9cce95..2c2a6c306 100644 --- a/mmdet3d/models/roi_heads/bbox_heads/parta2_bbox_head.py +++ b/mmdet3d/models/roi_heads/bbox_heads/parta2_bbox_head.py @@ -343,7 +343,7 @@ def loss(self, cls_score, bbox_pred, rois, labels, bbox_targets, pred_boxes3d[..., 0:3] = rotation_3d_in_axis( pred_boxes3d[..., 0:3].unsqueeze(1), - (pos_rois_rotation + np.pi / 2), + pos_rois_rotation, axis=2).squeeze(1) pred_boxes3d[:, 0:3] += roi_xyz @@ -435,8 +435,7 @@ def _get_target_single(self, pos_bboxes, pos_gt_bboxes, ious, cfg): pos_gt_bboxes_ct[..., 0:3] -= roi_center pos_gt_bboxes_ct[..., 6] -= roi_ry pos_gt_bboxes_ct[..., 0:3] = rotation_3d_in_axis( - pos_gt_bboxes_ct[..., 0:3].unsqueeze(1), - -(roi_ry + np.pi / 2), + pos_gt_bboxes_ct[..., 0:3].unsqueeze(1), -roi_ry, axis=2).squeeze(1) # flip orientation if rois have opposite orientation @@ -529,8 +528,7 @@ def get_bboxes(self, local_roi_boxes[..., 0:3] = 0 rcnn_boxes3d = self.bbox_coder.decode(local_roi_boxes, bbox_pred) rcnn_boxes3d[..., 0:3] = rotation_3d_in_axis( - rcnn_boxes3d[..., 0:3].unsqueeze(1), (roi_ry + np.pi / 2), - axis=2).squeeze(1) + rcnn_boxes3d[..., 0:3].unsqueeze(1), roi_ry, axis=2).squeeze(1) rcnn_boxes3d[:, 0:3] += roi_xyz # post processing diff --git a/mmdet3d/models/roi_heads/mask_heads/primitive_head.py b/mmdet3d/models/roi_heads/mask_heads/primitive_head.py index 147219a28..4ccc7ebbe 100644 --- a/mmdet3d/models/roi_heads/mask_heads/primitive_head.py +++ b/mmdet3d/models/roi_heads/mask_heads/primitive_head.py @@ -354,7 +354,7 @@ def get_targets_single(self, # Generate pts_semantic_mask and pts_instance_mask when they are None if pts_semantic_mask is None or pts_instance_mask is None: - points2box_mask = gt_bboxes_3d.points_in_boxes(points) + points2box_mask = gt_bboxes_3d.points_in_boxes_batch(points) assignment = points2box_mask.argmax(1) background_mask = points2box_mask.max(1)[0] == 0 diff --git a/mmdet3d/ops/iou3d/src/iou3d_kernel.cu b/mmdet3d/ops/iou3d/src/iou3d_kernel.cu index fce3f7882..14f7681bc 100644 --- a/mmdet3d/ops/iou3d/src/iou3d_kernel.cu +++ b/mmdet3d/ops/iou3d/src/iou3d_kernel.cu @@ -61,9 +61,9 @@ __device__ inline int check_in_box2d(const float *box, const Point &p) { angle_sin = sin(-box[4]); // rotate the point in the opposite direction of box float rot_x = - (p.x - center_x) * angle_cos + (p.y - center_y) * angle_sin + center_x; + (p.x - center_x) * angle_cos - (p.y - center_y) * angle_sin + center_x; float rot_y = - -(p.x - center_x) * angle_sin + (p.y - center_y) * angle_cos + center_y; + (p.x - center_x) * angle_sin + (p.y - center_y) * angle_cos + center_y; #ifdef DEBUG printf("box: (%.3f, %.3f, %.3f, %.3f, %.3f)\n", box[0], box[1], box[2], box[3], box[4]); @@ -112,9 +112,9 @@ __device__ inline void rotate_around_center(const Point ¢er, const float angle_cos, const float angle_sin, Point &p) { float new_x = - (p.x - center.x) * angle_cos + (p.y - center.y) * angle_sin + center.x; + (p.x - center.x) * angle_cos - (p.y - center.y) * angle_sin + center.x; float new_y = - -(p.x - center.x) * angle_sin + (p.y - center.y) * angle_cos + center.y; + (p.x - center.x) * angle_sin + (p.y - center.y) * angle_cos + center.y; p.set(new_x, new_y); } diff --git a/mmdet3d/ops/roiaware_pool3d/points_in_boxes.py b/mmdet3d/ops/roiaware_pool3d/points_in_boxes.py index f576fedcc..14e16b992 100644 --- a/mmdet3d/ops/roiaware_pool3d/points_in_boxes.py +++ b/mmdet3d/ops/roiaware_pool3d/points_in_boxes.py @@ -7,17 +7,17 @@ def points_in_boxes_gpu(points, boxes): """Find points that are in boxes (CUDA) Args: - points (torch.Tensor): [B, M, 3], [x, y, z] in LiDAR coordinate + points (torch.Tensor): [B, M, 3], [x, y, z] in LiDAR/DEPTH coordinate boxes (torch.Tensor): [B, T, 7], - num_valid_boxes <= T, [x, y, z, w, l, h, ry] in LiDAR coordinate, - (x, y, z) is the bottom center + num_valid_boxes <= T, [x, y, z, dx, dy, dz, rz] in + LiDAR/DEPTH coordinate, (x, y, z) is the bottom center Returns: box_idxs_of_pts (torch.Tensor): (B, M), default background = -1 """ - assert boxes.shape[0] == points.shape[0], \ + assert points.shape[0] == boxes.shape[0], \ f'Points and boxes should have the same batch size, ' \ - f'got {boxes.shape[0]} and {boxes.shape[0]}' + f'got {points.shape[0]} and {boxes.shape[0]}' assert boxes.shape[2] == 7, \ f'boxes dimension should be 7, ' \ f'got unexpected shape {boxes.shape[2]}' @@ -53,31 +53,35 @@ def points_in_boxes_gpu(points, boxes): def points_in_boxes_cpu(points, boxes): """Find points that are in boxes (CPU) - Note: - Currently, the output of this function is different from that of - points_in_boxes_gpu. - Args: - points (torch.Tensor): [npoints, 3] - boxes (torch.Tensor): [N, 7], in LiDAR coordinate, - (x, y, z) is the bottom center + points (torch.Tensor): [B, M, 3], [x, y, z] in + LiDAR/DEPTH coordinate + boxes (torch.Tensor): [B, T, 7], + num_valid_boxes <= T, [x, y, z, dx, dy, dz, rz], + (x, y, z) is the bottom center. Returns: - point_indices (torch.Tensor): (N, npoints) + box_idxs_of_pts (torch.Tensor): (B, M, T), default background = 0 """ - # TODO: Refactor this function as a CPU version of points_in_boxes_gpu - assert boxes.shape[1] == 7, \ + assert points.shape[0] == boxes.shape[0], \ + f'Points and boxes should have the same batch size, ' \ + f'got {points.shape[0]} and {boxes.shape[0]}' + assert boxes.shape[2] == 7, \ f'boxes dimension should be 7, ' \ f'got unexpected shape {boxes.shape[2]}' - assert points.shape[1] == 3, \ + assert points.shape[2] == 3, \ f'points dimension should be 3, ' \ f'got unexpected shape {points.shape[2]}' + batch_size, num_points, _ = points.shape + num_boxes = boxes.shape[1] - point_indices = points.new_zeros((boxes.shape[0], points.shape[0]), + point_indices = points.new_zeros((batch_size, num_boxes, num_points), dtype=torch.int) - roiaware_pool3d_ext.points_in_boxes_cpu(boxes.float().contiguous(), - points.float().contiguous(), - point_indices) + for b in range(batch_size): + roiaware_pool3d_ext.points_in_boxes_cpu(boxes[b].float().contiguous(), + points[b].float().contiguous(), + point_indices[b]) + point_indices = point_indices.transpose(1, 2) return point_indices @@ -86,9 +90,9 @@ def points_in_boxes_batch(points, boxes): """Find points that are in boxes (CUDA) Args: - points (torch.Tensor): [B, M, 3], [x, y, z] in LiDAR coordinate + points (torch.Tensor): [B, M, 3], [x, y, z] in LiDAR/DEPTH coordinate boxes (torch.Tensor): [B, T, 7], - num_valid_boxes <= T, [x, y, z, w, l, h, ry] in LiDAR coordinate, + num_valid_boxes <= T, [x, y, z, dx, dy, dz, rz], (x, y, z) is the bottom center. Returns: diff --git a/mmdet3d/ops/roiaware_pool3d/src/points_in_boxes_cpu.cpp b/mmdet3d/ops/roiaware_pool3d/src/points_in_boxes_cpu.cpp index a26ffb62b..7e5956b67 100644 --- a/mmdet3d/ops/roiaware_pool3d/src/points_in_boxes_cpu.cpp +++ b/mmdet3d/ops/roiaware_pool3d/src/points_in_boxes_cpu.cpp @@ -15,9 +15,7 @@ inline void lidar_to_local_coords_cpu(float shift_x, float shift_y, float rz, float &local_x, float &local_y) { - // should rotate pi/2 + alpha to translate LiDAR to local - float rot_angle = rz + M_PI / 2; - float cosa = cos(rot_angle), sina = sin(rot_angle); + float cosa = cos(-rz), sina = sin(-rz); local_x = shift_x * cosa + shift_y * (-sina); local_y = shift_x * sina + shift_y * cosa; } @@ -29,13 +27,13 @@ inline int check_pt_in_box3d_cpu(const float *pt, const float *box3d, // bottom center float x = pt[0], y = pt[1], z = pt[2]; float cx = box3d[0], cy = box3d[1], cz = box3d[2]; - float w = box3d[3], l = box3d[4], h = box3d[5], rz = box3d[6]; - cz += h / 2.0; // shift to the center since cz in box3d is the bottom center + float dx = box3d[3], dy = box3d[4], dz = box3d[5], rz = box3d[6]; + cz += dz / 2.0; // shift to the center since cz in box3d is the bottom center - if (fabsf(z - cz) > h / 2.0) return 0; + if (fabsf(z - cz) > dz / 2.0) return 0; lidar_to_local_coords_cpu(x - cx, y - cy, rz, local_x, local_y); - float in_flag = (local_x > -l / 2.0) & (local_x < l / 2.0) & - (local_y > -w / 2.0) & (local_y < w / 2.0); + float in_flag = (local_x > -dx / 2.0) & (local_x < dx / 2.0) & + (local_y > -dy / 2.0) & (local_y < dy / 2.0); return in_flag; } diff --git a/mmdet3d/ops/roiaware_pool3d/src/points_in_boxes_cuda.cu b/mmdet3d/ops/roiaware_pool3d/src/points_in_boxes_cuda.cu index 896b316e6..4fed2002f 100644 --- a/mmdet3d/ops/roiaware_pool3d/src/points_in_boxes_cuda.cu +++ b/mmdet3d/ops/roiaware_pool3d/src/points_in_boxes_cuda.cu @@ -24,9 +24,7 @@ __device__ inline void lidar_to_local_coords(float shift_x, float shift_y, float rz, float &local_x, float &local_y) { - // should rotate pi/2 + alpha to translate LiDAR to local - float rot_angle = rz + M_PI / 2; - float cosa = cos(rot_angle), sina = sin(rot_angle); + float cosa = cos(-rz), sina = sin(-rz); local_x = shift_x * cosa + shift_y * (-sina); local_y = shift_x * sina + shift_y * cosa; } @@ -38,13 +36,13 @@ __device__ inline int check_pt_in_box3d(const float *pt, const float *box3d, // bottom center float x = pt[0], y = pt[1], z = pt[2]; float cx = box3d[0], cy = box3d[1], cz = box3d[2]; - float w = box3d[3], l = box3d[4], h = box3d[5], rz = box3d[6]; - cz += h / 2.0; // shift to the center since cz in box3d is the bottom center + float dx = box3d[3], dy = box3d[4], dz = box3d[5], rz = box3d[6]; + cz += dz / 2.0; // shift to the center since cz in box3d is the bottom center - if (fabsf(z - cz) > h / 2.0) return 0; + if (fabsf(z - cz) > dz / 2.0) return 0; lidar_to_local_coords(x - cx, y - cy, rz, local_x, local_y); - float in_flag = (local_x > -l / 2.0) & (local_x < l / 2.0) & - (local_y > -w / 2.0) & (local_y < w / 2.0); + float in_flag = (local_x > -dx / 2.0) & (local_x < dx / 2.0) & + (local_y > -dy / 2.0) & (local_y < dy / 2.0); return in_flag; } @@ -52,7 +50,7 @@ __global__ void points_in_boxes_kernel(int batch_size, int boxes_num, int pts_num, const float *boxes, const float *pts, int *box_idx_of_points) { - // params boxes: (B, N, 7) [x, y, z, w, l, h, rz] in LiDAR coordinate, z is + // params boxes: (B, N, 7) [x, y, z, dx, dy, dz, rz] in LiDAR coordinate, z is // the bottom center, each box DO NOT overlaps params pts: (B, npoints, 3) [x, // y, z] in LiDAR coordinate params boxes_idx_of_points: (B, npoints), default // -1 @@ -80,7 +78,7 @@ __global__ void points_in_boxes_batch_kernel(int batch_size, int boxes_num, int pts_num, const float *boxes, const float *pts, int *box_idx_of_points) { - // params boxes: (B, N, 7) [x, y, z, w, l, h, rz] in LiDAR coordinate, z is + // params boxes: (B, N, 7) [x, y, z, dx, dy, dz, rz] in LiDAR coordinate, z is // the bottom center, each box DO NOT overlaps params pts: (B, npoints, 3) [x, // y, z] in LiDAR coordinate params boxes_idx_of_points: (B, npoints), default // -1 @@ -107,7 +105,7 @@ __global__ void points_in_boxes_batch_kernel(int batch_size, int boxes_num, void points_in_boxes_launcher(int batch_size, int boxes_num, int pts_num, const float *boxes, const float *pts, int *box_idx_of_points) { - // params boxes: (B, N, 7) [x, y, z, w, l, h, rz] in LiDAR coordinate, z is + // params boxes: (B, N, 7) [x, y, z, dx, dy, dz, rz] in LiDAR coordinate, z is // the bottom center, each box DO NOT overlaps params pts: (B, npoints, 3) [x, // y, z] in LiDAR coordinate params boxes_idx_of_points: (B, npoints), default // -1 @@ -132,7 +130,7 @@ void points_in_boxes_launcher(int batch_size, int boxes_num, int pts_num, void points_in_boxes_batch_launcher(int batch_size, int boxes_num, int pts_num, const float *boxes, const float *pts, int *box_idx_of_points) { - // params boxes: (B, N, 7) [x, y, z, w, l, h, rz] in LiDAR coordinate, z is + // params boxes: (B, N, 7) [x, y, z, dx, dy, dz, rz] in LiDAR coordinate, z is // the bottom center, each box params pts: (B, npoints, 3) [x, y, z] in // LiDAR coordinate params boxes_idx_of_points: (B, npoints), default -1 cudaError_t err; @@ -155,7 +153,7 @@ void points_in_boxes_batch_launcher(int batch_size, int boxes_num, int pts_num, int points_in_boxes_gpu(at::Tensor boxes_tensor, at::Tensor pts_tensor, at::Tensor box_idx_of_points_tensor) { - // params boxes: (B, N, 7) [x, y, z, w, l, h, rz] in LiDAR coordinate, z is + // params boxes: (B, N, 7) [x, y, z, dx, dy, dz, rz] in LiDAR coordinate, z is // the bottom center, each box DO NOT overlaps params pts: (B, npoints, 3) [x, // y, z] in LiDAR coordinate params boxes_idx_of_points: (B, npoints), default // -1 @@ -180,7 +178,7 @@ int points_in_boxes_gpu(at::Tensor boxes_tensor, at::Tensor pts_tensor, int points_in_boxes_batch(at::Tensor boxes_tensor, at::Tensor pts_tensor, at::Tensor box_idx_of_points_tensor) { - // params boxes: (B, N, 7) [x, y, z, w, l, h, rz] in LiDAR coordinate, z is + // params boxes: (B, N, 7) [x, y, z, dx, dy, dz, rz] in LiDAR coordinate, z is // the bottom center. params pts: (B, npoints, 3) [x, y, z] in LiDAR // coordinate params boxes_idx_of_points: (B, npoints), default -1 diff --git a/mmdet3d/ops/roiaware_pool3d/src/roiaware_pool3d_kernel.cu b/mmdet3d/ops/roiaware_pool3d/src/roiaware_pool3d_kernel.cu index 312b35dcb..c1c948e96 100644 --- a/mmdet3d/ops/roiaware_pool3d/src/roiaware_pool3d_kernel.cu +++ b/mmdet3d/ops/roiaware_pool3d/src/roiaware_pool3d_kernel.cu @@ -17,9 +17,7 @@ __device__ inline void lidar_to_local_coords(float shift_x, float shift_y, float rz, float &local_x, float &local_y) { - // should rotate pi/2 + alpha to translate LiDAR to local - float rot_angle = rz + M_PI / 2; - float cosa = cos(rot_angle), sina = sin(rot_angle); + float cosa = cos(-rz), sina = sin(-rz); local_x = shift_x * cosa + shift_y * (-sina); local_y = shift_x * sina + shift_y * cosa; } @@ -27,17 +25,17 @@ __device__ inline void lidar_to_local_coords(float shift_x, float shift_y, __device__ inline int check_pt_in_box3d(const float *pt, const float *box3d, float &local_x, float &local_y) { // param pt: (x, y, z) - // param box3d: (cx, cy, cz, w, l, h, rz) in LiDAR coordinate, cz in the + // param box3d: (cx, cy, cz, dx, dy, dz, rz) in LiDAR coordinate, cz in the // bottom center float x = pt[0], y = pt[1], z = pt[2]; float cx = box3d[0], cy = box3d[1], cz = box3d[2]; - float w = box3d[3], l = box3d[4], h = box3d[5], rz = box3d[6]; - cz += h / 2.0; // shift to the center since cz in box3d is the bottom center + float dx = box3d[3], dy = box3d[4], dz = box3d[5], rz = box3d[6]; + cz += dz / 2.0; // shift to the center since cz in box3d is the bottom center - if (fabsf(z - cz) > h / 2.0) return 0; + if (fabsf(z - cz) > dz / 2.0) return 0; lidar_to_local_coords(x - cx, y - cy, rz, local_x, local_y); - float in_flag = (local_x > -l / 2.0) & (local_x < l / 2.0) & - (local_y > -w / 2.0) & (local_y < w / 2.0); + float in_flag = (local_x > -dx / 2.0) & (local_x < dx / 2.0) & + (local_y > -dy / 2.0) & (local_y < dy / 2.0); return in_flag; } @@ -45,9 +43,9 @@ __global__ void generate_pts_mask_for_box3d(int boxes_num, int pts_num, int out_x, int out_y, int out_z, const float *rois, const float *pts, int *pts_mask) { - // params rois: (N, 7) [x, y, z, w, l, h, rz] in LiDAR coordinate + // params rois: (N, 7) [x, y, z, dx, dy, dz, rz] in LiDAR coordinate // params pts: (npoints, 3) [x, y, z] - // params pts_mask: (N, npoints): -1 means point doesnot in this box, + // params pts_mask: (N, npoints): -1 means point does not in this box, // otherwise: encode (x_idxs, y_idxs, z_idxs) by binary bit int pt_idx = blockIdx.x * blockDim.x + threadIdx.x; int box_idx = blockIdx.y; @@ -63,14 +61,14 @@ __global__ void generate_pts_mask_for_box3d(int boxes_num, int pts_num, pts_mask[0] = -1; if (cur_in_flag > 0) { float local_z = pts[2] - rois[2]; - float w = rois[3], l = rois[4], h = rois[5]; + float dx = rois[3], dy = rois[4], dz = rois[5]; - float x_res = l / out_x; - float y_res = w / out_y; - float z_res = h / out_z; + float x_res = dx / out_x; + float y_res = dy / out_y; + float z_res = dz / out_z; - unsigned int x_idx = int((local_x + l / 2) / x_res); - unsigned int y_idx = int((local_y + w / 2) / y_res); + unsigned int x_idx = int((local_x + dx / 2) / x_res); + unsigned int y_idx = int((local_y + dy / 2) / y_res); unsigned int z_idx = int(local_z / z_res); x_idx = min(max(x_idx, 0), out_x - 1); @@ -231,7 +229,7 @@ void roiaware_pool3d_launcher(int boxes_num, int pts_num, int channels, const float *pts_feature, int *argmax, int *pts_idx_of_voxels, float *pooled_features, int pool_method) { - // params rois: (N, 7) [x, y, z, w, l, h, rz] in LiDAR coordinate + // params rois: (N, 7) [x, y, z, dx, dy, dz, rz] in LiDAR coordinate // params pts: (npoints, 3) [x, y, z] in LiDAR coordinate // params pts_feature: (npoints, C) // params argmax: (N, out_x, out_y, out_z, C) diff --git a/setup.cfg b/setup.cfg index c2b6d9893..8b615d27c 100644 --- a/setup.cfg +++ b/setup.cfg @@ -8,6 +8,6 @@ line_length = 79 multi_line_output = 0 known_standard_library = setuptools known_first_party = mmdet,mmseg,mmdet3d -known_third_party = cv2,indoor3d_util,load_scannet_data,lyft_dataset_sdk,m2r,matplotlib,mmcv,nuimages,numba,numpy,nuscenes,pandas,plyfile,pycocotools,pyquaternion,pytest,recommonmark,scannet_utils,scipy,seaborn,shapely,skimage,tensorflow,terminaltables,torch,trimesh,waymo_open_dataset +known_third_party = cv2,imageio,indoor3d_util,load_scannet_data,lyft_dataset_sdk,m2r,matplotlib,mmcv,nuimages,numba,numpy,nuscenes,pandas,plyfile,pycocotools,pyquaternion,pytest,recommonmark,scannet_utils,scipy,seaborn,shapely,skimage,tensorflow,terminaltables,torch,trimesh,waymo_open_dataset no_lines_before = STDLIB,LOCALFOLDER default_section = THIRDPARTY diff --git a/tests/data/kitti/kitti_dbinfos_train.pkl b/tests/data/kitti/kitti_dbinfos_train.pkl index baa56c1a2..d9be817a1 100644 Binary files a/tests/data/kitti/kitti_dbinfos_train.pkl and b/tests/data/kitti/kitti_dbinfos_train.pkl differ diff --git a/tests/data/lyft/lyft_infos.pkl b/tests/data/lyft/lyft_infos.pkl index 5fa2a41d1..84295d134 100644 Binary files a/tests/data/lyft/lyft_infos.pkl and b/tests/data/lyft/lyft_infos.pkl differ diff --git a/tests/data/lyft/lyft_infos_val.pkl b/tests/data/lyft/lyft_infos_val.pkl index 4ee7a7767..30e61d760 100644 Binary files a/tests/data/lyft/lyft_infos_val.pkl and b/tests/data/lyft/lyft_infos_val.pkl differ diff --git a/tests/data/lyft/sample_results.pkl b/tests/data/lyft/sample_results.pkl index c6509dfc4..132baa484 100644 Binary files a/tests/data/lyft/sample_results.pkl and b/tests/data/lyft/sample_results.pkl differ diff --git a/tests/data/sunrgbd/sunrgbd_infos.pkl b/tests/data/sunrgbd/sunrgbd_infos.pkl index 8f98f2fb1..c637abb9e 100644 Binary files a/tests/data/sunrgbd/sunrgbd_infos.pkl and b/tests/data/sunrgbd/sunrgbd_infos.pkl differ diff --git a/tests/data/waymo/kitti_format/waymo_dbinfos_train.pkl b/tests/data/waymo/kitti_format/waymo_dbinfos_train.pkl index 1816e6a94..a88b8703e 100644 Binary files a/tests/data/waymo/kitti_format/waymo_dbinfos_train.pkl and b/tests/data/waymo/kitti_format/waymo_dbinfos_train.pkl differ diff --git a/tests/test_data/test_datasets/test_kitti_dataset.py b/tests/test_data/test_datasets/test_kitti_dataset.py index 2501ac66f..19dd58cf6 100644 --- a/tests/test_data/test_datasets/test_kitti_dataset.py +++ b/tests/test_data/test_datasets/test_kitti_dataset.py @@ -1,10 +1,11 @@ +import math import numpy as np import os import pytest import tempfile import torch -from mmdet3d.core.bbox import LiDARInstance3DBoxes +from mmdet3d.core.bbox import LiDARInstance3DBoxes, limit_period from mmdet3d.datasets import KittiDataset @@ -112,6 +113,7 @@ def test_getitem(): type='ObjectSample', db_sampler=dict( data_root='tests/data/kitti/', + # in coordinate system refactor, this test file is modified info_path='tests/data/kitti/kitti_dbinfos_train.pkl', rate=1.0, prepare=dict( @@ -150,8 +152,29 @@ def test_getitem(): gt_bboxes_3d = data['gt_bboxes_3d']._data gt_labels_3d = data['gt_labels_3d']._data expected_gt_bboxes_3d = torch.tensor( - [[9.5081, -5.2269, -1.1370, 0.4915, 1.2288, 1.9353, -2.7136]]) + [[9.5081, -5.2269, -1.1370, 1.2288, 0.4915, 1.9353, 1.9988]]) expected_gt_labels_3d = torch.tensor([0]) + rot_matrix = data['img_metas']._data['pcd_rotation'] + rot_angle = data['img_metas']._data['pcd_rotation_angle'] + horizontal_flip = data['img_metas']._data['pcd_horizontal_flip'] + vertical_flip = data['img_metas']._data['pcd_vertical_flip'] + expected_rot_matrix = torch.tensor([[0.8018, 0.5976, 0.0000], + [-0.5976, 0.8018, 0.0000], + [0.0000, 0.0000, 1.0000]]) + expected_rot_angle = 0.6404654291602163 + noise_angle = 0.20247319 + assert torch.allclose(expected_rot_matrix, rot_matrix, atol=1e-4) + assert math.isclose(expected_rot_angle, rot_angle, abs_tol=1e-4) + assert horizontal_flip is True + assert vertical_flip is False + + # after coord system refactor + expected_gt_bboxes_3d[:, :3] = \ + expected_gt_bboxes_3d[:, :3] @ rot_matrix @ rot_matrix + expected_gt_bboxes_3d[:, -1:] = -np.pi - expected_gt_bboxes_3d[:, -1:] \ + + 2 * rot_angle - 2 * noise_angle + expected_gt_bboxes_3d[:, -1:] = limit_period( + expected_gt_bboxes_3d[:, -1:], period=np.pi * 2) assert points.shape == (780, 4) assert torch.allclose( gt_bboxes_3d.tensor, expected_gt_bboxes_3d, atol=1e-4) @@ -345,9 +368,10 @@ def test_format_results(): pipeline, modality, split = _generate_kitti_dataset_config() kitti_dataset = KittiDataset(data_root, ann_file, split, pts_prefix, pipeline, classes, modality) + # coord system refactor boxes_3d = LiDARInstance3DBoxes( torch.tensor( - [[8.7314, -1.8559, -1.5997, 0.4800, 1.2000, 1.8900, 0.0100]])) + [[8.7314, -1.8559, -1.5997, 1.2000, 0.4800, 1.8900, -1.5808]])) labels_3d = torch.tensor([ 0, ]) @@ -358,21 +382,23 @@ def test_format_results(): expected_name = np.array(['Pedestrian']) expected_truncated = np.array([0.]) expected_occluded = np.array([0]) - expected_alpha = np.array([-3.3410306]) + # coord sys refactor + expected_alpha = np.array(-3.3410306 + np.pi) expected_bbox = np.array([[710.443, 144.00221, 820.29114, 307.58667]]) expected_dimensions = np.array([[1.2, 1.89, 0.48]]) expected_location = np.array([[1.8399826, 1.4700007, 8.410018]]) - expected_rotation_y = np.array([-3.1315928]) + expected_rotation_y = np.array([0.0100]) expected_score = np.array([0.5]) expected_sample_idx = np.array([0]) assert np.all(result_files[0]['name'] == expected_name) assert np.allclose(result_files[0]['truncated'], expected_truncated) assert np.all(result_files[0]['occluded'] == expected_occluded) - assert np.allclose(result_files[0]['alpha'], expected_alpha) + assert np.allclose(result_files[0]['alpha'], expected_alpha, 1e-3) assert np.allclose(result_files[0]['bbox'], expected_bbox) assert np.allclose(result_files[0]['dimensions'], expected_dimensions) assert np.allclose(result_files[0]['location'], expected_location) - assert np.allclose(result_files[0]['rotation_y'], expected_rotation_y) + assert np.allclose(result_files[0]['rotation_y'], expected_rotation_y, + 1e-3) assert np.allclose(result_files[0]['score'], expected_score) assert np.allclose(result_files[0]['sample_idx'], expected_sample_idx) tmp_dir.cleanup() @@ -385,7 +411,7 @@ def test_bbox2result_kitti(): pipeline, classes, modality) boxes_3d = LiDARInstance3DBoxes( torch.tensor( - [[8.7314, -1.8559, -1.5997, 0.4800, 1.2000, 1.8900, 0.0100]])) + [[8.7314, -1.8559, -1.5997, 1.2000, 0.4800, 1.8900, -1.5808]])) labels_3d = torch.tensor([ 0, ]) @@ -399,10 +425,11 @@ def test_bbox2result_kitti(): expected_file_path = os.path.join(temp_kitti_result_dir, '000000.txt') expected_name = np.array(['Pedestrian']) expected_dimensions = np.array([1.2000, 1.8900, 0.4800]) - expected_rotation_y = np.array([0.0100]) - np.pi + # coord system refactor (reverse sign) + expected_rotation_y = 0.0100 expected_score = np.array([0.5]) assert np.all(det_annos[0]['name'] == expected_name) - assert np.allclose(det_annos[0]['rotation_y'], expected_rotation_y) + assert np.allclose(det_annos[0]['rotation_y'], expected_rotation_y, 1e-3) assert np.allclose(det_annos[0]['score'], expected_score) assert np.allclose(det_annos[0]['dimensions'], expected_dimensions) assert os.path.exists(expected_file_path) diff --git a/tests/test_data/test_datasets/test_lyft_dataset.py b/tests/test_data/test_datasets/test_lyft_dataset.py index 6dc024930..4bdfd1234 100644 --- a/tests/test_data/test_datasets/test_lyft_dataset.py +++ b/tests/test_data/test_datasets/test_lyft_dataset.py @@ -3,6 +3,7 @@ import tempfile import torch +from mmdet3d.core import limit_period from mmdet3d.datasets import LyftDataset @@ -10,6 +11,7 @@ def test_getitem(): np.random.seed(0) torch.manual_seed(0) root_path = './tests/data/lyft' + # in coordinate system refactor, this test file is modified ann_file = './tests/data/lyft/lyft_infos.pkl' class_names = ('car', 'truck', 'bus', 'emergency_vehicle', 'other_vehicle', 'motorcycle', 'bicycle', 'pedestrian', 'animal') @@ -48,9 +50,11 @@ def test_getitem(): pcd_horizontal_flip = data['img_metas']._data['pcd_horizontal_flip'] pcd_scale_factor = data['img_metas']._data['pcd_scale_factor'] pcd_rotation = data['img_metas']._data['pcd_rotation'] + pcd_rotation_angle = data['img_metas']._data['pcd_rotation_angle'] sample_idx = data['img_metas']._data['sample_idx'] - pcd_rotation_expected = np.array([[0.99869376, -0.05109515, 0.], - [0.05109515, 0.99869376, 0.], + # coord sys refactor + pcd_rotation_expected = np.array([[0.99869376, 0.05109515, 0.], + [-0.05109515, 0.99869376, 0.], [0., 0., 1.]]) assert pts_filename == \ 'tests/data/lyft/lidar/host-a017_lidar1_1236118886901125926.bin' @@ -81,6 +85,21 @@ def test_getitem(): expected_gt_labels = np.array([0, 4, 7]) original_classes = lyft_dataset.CLASSES + # manually go through pipeline + expected_points[:, :3] = ( + (expected_points[:, :3] * torch.tensor([1, -1, 1])) + @ pcd_rotation_expected @ pcd_rotation_expected) * torch.tensor( + [1, -1, 1]) + expected_gt_bboxes_3d[:, :3] = ( + (expected_gt_bboxes_3d[:, :3] * torch.tensor([1, -1, 1])) + @ pcd_rotation_expected @ pcd_rotation_expected) * torch.tensor( + [1, -1, 1]) + expected_gt_bboxes_3d[:, 3:6] = expected_gt_bboxes_3d[:, [4, 3, 5]] + expected_gt_bboxes_3d[:, 6:] = -expected_gt_bboxes_3d[:, 6:] \ + - np.pi / 2 - pcd_rotation_angle * 2 + expected_gt_bboxes_3d[:, 6:] = limit_period( + expected_gt_bboxes_3d[:, 6:], period=np.pi * 2) + assert torch.allclose(points, expected_points, 1e-2) assert torch.allclose(gt_bboxes_3d.tensor, expected_gt_bboxes_3d, 1e-3) assert np.all(gt_labels_3d.numpy() == expected_gt_labels) @@ -109,8 +128,10 @@ def test_getitem(): def test_evaluate(): root_path = './tests/data/lyft' + # in coordinate system refactor, this test file is modified ann_file = './tests/data/lyft/lyft_infos_val.pkl' lyft_dataset = LyftDataset(ann_file, None, root_path) + # in coordinate system refactor, this test file is modified results = mmcv.load('./tests/data/lyft/sample_results.pkl') ap_dict = lyft_dataset.evaluate(results, 'bbox') car_precision = ap_dict['pts_bbox_Lyft/car_AP'] @@ -148,11 +169,11 @@ def test_show(): kitti_dataset = LyftDataset(ann_file, None, root_path) boxes_3d = LiDARInstance3DBoxes( torch.tensor( - [[46.1218, -4.6496, -0.9275, 0.5316, 1.4442, 1.7450, 1.1749], - [33.3189, 0.1981, 0.3136, 0.5656, 1.2301, 1.7985, 1.5723], - [46.1366, -4.6404, -0.9510, 0.5162, 1.6501, 1.7540, 1.3778], - [33.2646, 0.2297, 0.3446, 0.5746, 1.3365, 1.7947, 1.5430], - [58.9079, 16.6272, -1.5829, 1.5656, 3.9313, 1.4899, 1.5505]])) + [[46.1218, -4.6496, -0.9275, 1.4442, 0.5316, 1.7450, -2.7457], + [33.3189, 0.1981, 0.3136, 1.2301, 0.5656, 1.7985, 3.1401], + [46.1366, -4.6404, -0.9510, 1.6501, 0.5162, 1.7540, -2.9486], + [33.2646, 0.2297, 0.3446, 1.3365, 0.5746, 1.7947, -3.1138], + [58.9079, 16.6272, -1.5829, 3.9313, 1.5656, 1.4899, -3.1213]])) scores_3d = torch.tensor([0.1815, 0.1663, 0.5792, 0.2194, 0.2780]) labels_3d = torch.tensor([0, 0, 1, 1, 2]) result = dict(boxes_3d=boxes_3d, scores_3d=scores_3d, labels_3d=labels_3d) diff --git a/tests/test_data/test_datasets/test_sunrgbd_dataset.py b/tests/test_data/test_datasets/test_sunrgbd_dataset.py index f4e9f1f21..c1d42a2bf 100644 --- a/tests/test_data/test_datasets/test_sunrgbd_dataset.py +++ b/tests/test_data/test_datasets/test_sunrgbd_dataset.py @@ -7,6 +7,7 @@ def _generate_sunrgbd_dataset_config(): root_path = './tests/data/sunrgbd' + # in coordinate system refactor, this test file is modified ann_file = './tests/data/sunrgbd/sunrgbd_infos.pkl' class_names = ('bed', 'table', 'sofa', 'chair', 'toilet', 'desk', 'dresser', 'night_stand', 'bookshelf', 'bathtub') @@ -119,6 +120,8 @@ def test_getitem(): [[0.8308, 4.1168, -1.2035, 2.2493, 1.8444, 1.9245, 1.6486], [2.3002, 4.8149, -1.2442, 0.5718, 0.8629, 0.9510, 1.6030], [-1.1477, 1.8090, -1.1725, 0.6965, 1.5273, 2.0563, 0.0552]]) + # coord sys refactor (rotation is correct but yaw has to be reversed) + expected_gt_bboxes_3d[:, 6:] = -expected_gt_bboxes_3d[:, 6:] expected_gt_labels = np.array([0, 7, 6]) original_classes = sunrgbd_dataset.CLASSES diff --git a/tests/test_data/test_datasets/test_waymo_dataset.py b/tests/test_data/test_datasets/test_waymo_dataset.py index c76f8dc0c..41dbe3138 100644 --- a/tests/test_data/test_datasets/test_waymo_dataset.py +++ b/tests/test_data/test_datasets/test_waymo_dataset.py @@ -15,6 +15,7 @@ def _generate_waymo_train_dataset_config(): file_client_args = dict(backend='disk') db_sampler = dict( data_root=data_root, + # in coordinate system refactor, this test file is modified info_path=data_root + 'waymo_dbinfos_train.pkl', rate=1.0, prepare=dict( @@ -113,7 +114,7 @@ def test_getitem(): gt_bboxes_3d = data['gt_bboxes_3d']._data gt_labels_3d = data['gt_labels_3d']._data expected_gt_bboxes_3d = torch.tensor( - [[31.4750, -4.5690, 2.1857, 2.3519, 6.0931, 3.1756, -1.2895]]) + [[31.8048, -0.1002, 2.1857, 6.0931, 2.3519, 3.1756, -0.1403]]) expected_gt_labels_3d = torch.tensor([0]) assert points.shape == (765, 5) assert torch.allclose( @@ -131,8 +132,8 @@ def test_evaluate(): pipeline, classes, modality) boxes_3d = LiDARInstance3DBoxes( torch.tensor([[ - 6.9684e+01, 3.3335e+01, 4.1465e-02, 2.0100e+00, 4.3600e+00, - 1.4600e+00, -9.0000e-02 + 6.9684e+01, 3.3335e+01, 4.1465e-02, 4.3600e+00, 2.0100e+00, + 1.4600e+00, 9.0000e-02 - np.pi / 2 ]])) labels_3d = torch.tensor([0]) scores_3d = torch.tensor([0.5]) @@ -149,8 +150,8 @@ def test_evaluate(): metric = ['waymo'] boxes_3d = LiDARInstance3DBoxes( torch.tensor([[ - 6.9684e+01, 3.3335e+01, 4.1465e-02, 2.0100e+00, 4.3600e+00, - 1.4600e+00, -9.0000e-02 + 6.9684e+01, 3.3335e+01, 4.1465e-02, 4.3600e+00, 2.0100e+00, + 1.4600e+00, 9.0000e-02 - np.pi / 2 ]])) labels_3d = torch.tensor([0]) scores_3d = torch.tensor([0.8]) @@ -177,11 +178,11 @@ def test_show(): data_root, ann_file, split=split, modality=modality, pipeline=pipeline) boxes_3d = LiDARInstance3DBoxes( torch.tensor( - [[46.1218, -4.6496, -0.9275, 0.5316, 1.4442, 1.7450, 1.1749], - [33.3189, 0.1981, 0.3136, 0.5656, 1.2301, 1.7985, 1.5723], - [46.1366, -4.6404, -0.9510, 0.5162, 1.6501, 1.7540, 1.3778], - [33.2646, 0.2297, 0.3446, 0.5746, 1.3365, 1.7947, 1.5430], - [58.9079, 16.6272, -1.5829, 1.5656, 3.9313, 1.4899, 1.5505]])) + [[46.1218, -4.6496, -0.9275, 1.4442, 0.5316, 1.7450, 1.1749], + [33.3189, 0.1981, 0.3136, 1.2301, 0.5656, 1.7985, 1.5723], + [46.1366, -4.6404, -0.9510, 1.6501, 0.5162, 1.7540, 1.3778], + [33.2646, 0.2297, 0.3446, 1.3365, 0.5746, 1.7947, 1.5430], + [58.9079, 16.6272, -1.5829, 3.9313, 1.5656, 1.4899, 1.5505]])) scores_3d = torch.tensor([0.1815, 0.1663, 0.5792, 0.2194, 0.2780]) labels_3d = torch.tensor([0, 0, 1, 1, 2]) result = dict(boxes_3d=boxes_3d, scores_3d=scores_3d, labels_3d=labels_3d) @@ -230,8 +231,8 @@ def test_format_results(): pipeline, classes, modality) boxes_3d = LiDARInstance3DBoxes( torch.tensor([[ - 6.9684e+01, 3.3335e+01, 4.1465e-02, 2.0100e+00, 4.3600e+00, - 1.4600e+00, -9.0000e-02 + 6.9684e+01, 3.3335e+01, 4.1465e-02, 4.3600e+00, 2.0100e+00, + 1.4600e+00, 9.0000e-02 - np.pi / 2 ]])) labels_3d = torch.tensor([0]) scores_3d = torch.tensor([0.5]) @@ -251,11 +252,11 @@ def test_format_results(): assert np.all(result_files[0]['name'] == expected_name) assert np.allclose(result_files[0]['truncated'], expected_truncated) assert np.all(result_files[0]['occluded'] == expected_occluded) - assert np.allclose(result_files[0]['alpha'], expected_alpha) - assert np.allclose(result_files[0]['bbox'], expected_bbox) + assert np.allclose(result_files[0]['bbox'], expected_bbox, 1e-3) assert np.allclose(result_files[0]['dimensions'], expected_dimensions) assert np.allclose(result_files[0]['location'], expected_location) assert np.allclose(result_files[0]['rotation_y'], expected_rotation_y) assert np.allclose(result_files[0]['score'], expected_score) assert np.allclose(result_files[0]['sample_idx'], expected_sample_idx) + assert np.allclose(result_files[0]['alpha'], expected_alpha) tmp_dir.cleanup() diff --git a/tests/test_data/test_pipelines/test_augmentations/test_transforms_3d.py b/tests/test_data/test_pipelines/test_augmentations/test_transforms_3d.py index 11ca4c4ec..42173cdf7 100644 --- a/tests/test_data/test_pipelines/test_augmentations/test_transforms_3d.py +++ b/tests/test_data/test_pipelines/test_augmentations/test_transforms_3d.py @@ -130,8 +130,12 @@ def test_object_noise(): input_dict = object_noise(input_dict) points = input_dict['points'] gt_bboxes_3d = input_dict['gt_bboxes_3d'].tensor - expected_gt_bboxes_3d = torch.tensor( - [[9.1724, -1.7559, -1.3550, 0.4800, 1.2000, 1.8900, 0.0505]]) + + # coord sys refactor (lidar2cam) + expected_gt_bboxes_3d = torch.tensor([[ + 9.1724, -1.7559, -1.3550, 1.2000, 0.4800, 1.8900, + 0.0505 - float(rots) * 2 - np.pi / 2 + ]]) repr_str = repr(object_noise) expected_repr_str = 'ObjectNoise(num_try=100, ' \ 'translation_std=[0.25, 0.25, 0.25], ' \ @@ -520,11 +524,11 @@ def test_random_flip_3d(): [21.2334, -9.3607, -0.2588, 0.0000], [21.2179, -9.4372, -0.2598, 0.0000]]) expected_gt_bboxes_3d = torch.tensor( - [[38.9229, -18.4417, -1.1459, 0.7100, 1.7600, 1.8600, 5.4068], - [12.7768, -0.5795, -2.2682, 0.5700, 0.9900, 1.7200, 5.6445], - [12.7557, -2.2996, -1.4869, 0.6100, 1.1100, 1.9000, 5.0806], - [10.6677, -0.8064, -1.5435, 0.7900, 0.9600, 1.7900, 2.0560], - [5.0903, -5.1004, -1.2694, 0.7100, 1.7000, 1.8300, 5.0552]]) + [[38.9229, -18.4417, -1.1459, 0.7100, 1.7600, 1.8600, 2.2652], + [12.7768, -0.5795, -2.2682, 0.5700, 0.9900, 1.7200, 2.5029], + [12.7557, -2.2996, -1.4869, 0.6100, 1.1100, 1.9000, 1.9390], + [10.6677, -0.8064, -1.5435, 0.7900, 0.9600, 1.7900, -1.0856], + [5.0903, -5.1004, -1.2694, 0.7100, 1.7000, 1.8300, 1.9136]]) repr_str = repr(random_flip_3d) expected_repr_str = 'RandomFlip3D(sync_2d=True,' \ ' flip_ratio_bev_vertical=1.0)' diff --git a/tests/test_data/test_pipelines/test_indoor_pipeline.py b/tests/test_data/test_pipelines/test_indoor_pipeline.py index 91f87a942..6a63153fd 100644 --- a/tests/test_data/test_pipelines/test_indoor_pipeline.py +++ b/tests/test_data/test_pipelines/test_indoor_pipeline.py @@ -315,10 +315,24 @@ def test_sunrgbd_pipeline(): [0.8636, 1.3511, 0.0504, 0.0304], [0.8690, 1.3461, 0.1265, 0.1065], [0.8668, 1.3434, 0.1216, 0.1017]]) + # Depth coordinate system update: only yaw changes since rotation in depth + # is counter-clockwise and yaw angle is clockwise originally + # But heading angles in sunrgbd data also reverses the sign + # and after horizontal flip the sign reverse again + rotation_angle = info['annos']['rotation_y'] expected_gt_bboxes_3d = torch.tensor( - [[-1.2136, 4.0206, -0.2412, 2.2493, 1.8444, 1.9245, 1.3989], - [-2.7420, 4.5777, -0.7686, 0.5718, 0.8629, 0.9510, 1.4446], - [0.9729, 1.9087, -0.1443, 0.6965, 1.5273, 2.0563, 2.9924]]) + [[ + -1.2136, 4.0206, -0.2412, 2.2493, 1.8444, 1.9245, + 1.3989 + 0.047001579467984445 * 2 - 2 * rotation_angle[0] + ], + [ + -2.7420, 4.5777, -0.7686, 0.5718, 0.8629, 0.9510, + 1.4446 + 0.047001579467984445 * 2 - 2 * rotation_angle[1] + ], + [ + 0.9729, 1.9087, -0.1443, 0.6965, 1.5273, 2.0563, + 2.9924 + 0.047001579467984445 * 2 - 2 * rotation_angle[2] + ]]).float() expected_gt_labels_3d = np.array([0, 7, 6]) assert torch.allclose(gt_bboxes_3d.tensor, expected_gt_bboxes_3d, 1e-3) assert np.allclose(gt_labels_3d.flatten(), expected_gt_labels_3d) diff --git a/tests/test_data/test_pipelines/test_outdoor_pipeline.py b/tests/test_data/test_pipelines/test_outdoor_pipeline.py index 48febd3bf..90037ceff 100644 --- a/tests/test_data/test_pipelines/test_outdoor_pipeline.py +++ b/tests/test_data/test_pipelines/test_outdoor_pipeline.py @@ -37,63 +37,64 @@ def test_outdoor_aug_pipeline(): ] pipeline = Compose(train_pipeline) + # coord sys refactor: reverse sign of yaw gt_bboxes_3d = LiDARInstance3DBoxes( torch.tensor([ [ 2.16902428e+01, -4.06038128e-02, -1.61906636e+00, - 1.65999997e+00, 3.20000005e+00, 1.61000001e+00, -1.53999996e+00 + 1.65999997e+00, 3.20000005e+00, 1.61000001e+00, 1.53999996e+00 ], [ 7.05006886e+00, -6.57459593e+00, -1.60107934e+00, - 2.27999997e+00, 1.27799997e+01, 3.66000009e+00, 1.54999995e+00 + 2.27999997e+00, 1.27799997e+01, 3.66000009e+00, -1.54999995e+00 ], [ 2.24698811e+01, -6.69203758e+00, -1.50118136e+00, - 2.31999993e+00, 1.47299995e+01, 3.64000010e+00, 1.59000003e+00 + 2.31999993e+00, 1.47299995e+01, 3.64000010e+00, -1.59000003e+00 ], [ 3.48291969e+01, -7.09058380e+00, -1.36622977e+00, - 2.31999993e+00, 1.00400000e+01, 3.60999990e+00, 1.61000001e+00 + 2.31999993e+00, 1.00400000e+01, 3.60999990e+00, -1.61000001e+00 ], [ 4.62394600e+01, -7.75838804e+00, -1.32405007e+00, - 2.33999991e+00, 1.28299999e+01, 3.63000011e+00, 1.63999999e+00 + 2.33999991e+00, 1.28299999e+01, 3.63000011e+00, -1.63999999e+00 ], [ 2.82966995e+01, -5.55755794e-01, -1.30332506e+00, - 1.47000003e+00, 2.23000002e+00, 1.48000002e+00, -1.57000005e+00 + 1.47000003e+00, 2.23000002e+00, 1.48000002e+00, 1.57000005e+00 ], [ 2.66690197e+01, 2.18230209e+01, -1.73605704e+00, - 1.55999994e+00, 3.48000002e+00, 1.39999998e+00, -1.69000006e+00 + 1.55999994e+00, 3.48000002e+00, 1.39999998e+00, 1.69000006e+00 ], [ 3.13197803e+01, 8.16214371e+00, -1.62177873e+00, - 1.74000001e+00, 3.76999998e+00, 1.48000002e+00, 2.78999996e+00 + 1.74000001e+00, 3.76999998e+00, 1.48000002e+00, -2.78999996e+00 ], [ 4.34395561e+01, -1.95209332e+01, -1.20757008e+00, - 1.69000006e+00, 4.09999990e+00, 1.40999997e+00, -1.53999996e+00 + 1.69000006e+00, 4.09999990e+00, 1.40999997e+00, 1.53999996e+00 ], [ 3.29882965e+01, -3.79360509e+00, -1.69245458e+00, - 1.74000001e+00, 4.09000015e+00, 1.49000001e+00, -1.52999997e+00 + 1.74000001e+00, 4.09000015e+00, 1.49000001e+00, 1.52999997e+00 ], [ 3.85469360e+01, 8.35060215e+00, -1.31423414e+00, - 1.59000003e+00, 4.28000021e+00, 1.45000005e+00, 1.73000002e+00 + 1.59000003e+00, 4.28000021e+00, 1.45000005e+00, -1.73000002e+00 ], [ 2.22492104e+01, -1.13536005e+01, -1.38272512e+00, - 1.62000000e+00, 3.55999994e+00, 1.71000004e+00, 2.48000002e+00 + 1.62000000e+00, 3.55999994e+00, 1.71000004e+00, -2.48000002e+00 ], [ 3.36115799e+01, -1.97708054e+01, -4.92827654e-01, - 1.64999998e+00, 3.54999995e+00, 1.79999995e+00, -1.57000005e+00 + 1.64999998e+00, 3.54999995e+00, 1.79999995e+00, 1.57000005e+00 ], [ 9.85029602e+00, -1.51294518e+00, -1.66834795e+00, - 1.59000003e+00, 3.17000008e+00, 1.38999999e+00, -8.39999974e-01 + 1.59000003e+00, 3.17000008e+00, 1.38999999e+00, 8.39999974e-01 ] ], dtype=torch.float32)) @@ -104,23 +105,59 @@ def test_outdoor_aug_pipeline(): bbox3d_fields=[], img_fields=[]) + origin_center = gt_bboxes_3d.tensor[:, :3].clone() + origin_angle = gt_bboxes_3d.tensor[:, 6].clone() + output = pipeline(results) + # manually go through the pipeline + rotation_angle = output['img_metas']._data['pcd_rotation_angle'] + rotation_matrix = output['img_metas']._data['pcd_rotation'] + noise_angle = torch.tensor([ + 0.70853819, -0.19160091, -0.71116999, 0.49571753, -0.12447527, + -0.4690133, -0.34776965, -0.65692282, -0.52442831, -0.01575567, + -0.61849673, 0.6572608, 0.30312288, -0.19182971 + ]) + noise_trans = torch.tensor([[1.7641e+00, 4.0016e-01, 4.8937e-01], + [-1.3065e+00, 1.6581e+00, -5.9082e-02], + [-1.5504e+00, 4.1732e-01, -4.7218e-01], + [-5.2158e-01, -1.1847e+00, 4.8035e-01], + [-8.9637e-01, -1.9627e+00, 7.9241e-01], + [1.3240e-02, -1.2194e-01, 1.6953e-01], + [8.1798e-01, -2.7891e-01, 7.1578e-01], + [-4.1733e-04, 3.7416e-01, 2.0478e-01], + [1.5218e-01, -3.7413e-01, -6.7257e-03], + [-1.9138e+00, -2.2855e+00, -8.0092e-01], + [1.5933e+00, 5.6872e-01, -5.7244e-02], + [-1.8523e+00, -7.1333e-01, -8.8111e-01], + [5.2678e-01, 1.0106e-01, -1.9432e-01], + [-7.2449e-01, -8.0292e-01, -1.1334e-02]]) + angle = -origin_angle - noise_angle + torch.tensor(rotation_angle) + angle -= 2 * np.pi * (angle >= np.pi) + angle += 2 * np.pi * (angle < -np.pi) + scale = output['img_metas']._data['pcd_scale_factor'] + expected_tensor = torch.tensor( - [[20.6514, -8.8250, -1.0816, 1.5893, 3.0637, 1.5414, -1.9216], - [7.9374, 4.9457, -1.2008, 2.1829, 12.2357, 3.5041, 1.6629], - [20.8115, -2.0273, -1.8893, 2.2212, 14.1026, 3.4850, 2.6513], - [32.3850, -5.2135, -1.1321, 2.2212, 9.6124, 3.4562, 2.6498], - [43.7022, -7.8316, -0.5090, 2.2403, 12.2836, 3.4754, 2.0146], - [25.3300, -9.6670, -1.0855, 1.4074, 2.1350, 1.4170, -0.7141], - [16.5414, -29.0583, -0.9768, 1.4936, 3.3318, 1.3404, -0.7153], - [24.6548, -18.9226, -1.3567, 1.6659, 3.6094, 1.4170, 1.3970], - [45.8403, 1.8183, -1.1626, 1.6180, 3.9254, 1.3499, -0.6886], - [30.6288, -8.4497, -1.4881, 1.6659, 3.9158, 1.4265, -0.7241], - [32.3316, -22.4611, -1.3131, 1.5223, 4.0977, 1.3882, 2.4186], - [22.4492, 3.2944, -2.1674, 1.5510, 3.4084, 1.6372, 0.3928], - [37.3824, 5.0472, -0.6579, 1.5797, 3.3988, 1.7233, -1.4862], - [8.9259, -1.2578, -1.6081, 1.5223, 3.0350, 1.3308, -1.7212]]) + [[20.6514, -8.8250, -1.0816, 1.5893, 3.0637, 1.5414], + [7.9374, 4.9457, -1.2008, 2.1829, 12.2357, 3.5041], + [20.8115, -2.0273, -1.8893, 2.2212, 14.1026, 3.4850], + [32.3850, -5.2135, -1.1321, 2.2212, 9.6124, 3.4562], + [43.7022, -7.8316, -0.5090, 2.2403, 12.2836, 3.4754], + [25.3300, -9.6670, -1.0855, 1.4074, 2.1350, 1.4170], + [16.5414, -29.0583, -0.9768, 1.4936, 3.3318, 1.3404], + [24.6548, -18.9226, -1.3567, 1.6659, 3.6094, 1.4170], + [45.8403, 1.8183, -1.1626, 1.6180, 3.9254, 1.3499], + [30.6288, -8.4497, -1.4881, 1.6659, 3.9158, 1.4265], + [32.3316, -22.4611, -1.3131, 1.5223, 4.0977, 1.3882], + [22.4492, 3.2944, -2.1674, 1.5510, 3.4084, 1.6372], + [37.3824, 5.0472, -0.6579, 1.5797, 3.3988, 1.7233], + [8.9259, -1.2578, -1.6081, 1.5223, 3.0350, 1.3308]]) + + expected_tensor[:, :3] = (( + (origin_center + noise_trans) * torch.tensor([1, -1, 1])) + @ rotation_matrix) * scale + + expected_tensor = torch.cat([expected_tensor, angle.unsqueeze(-1)], dim=-1) assert torch.allclose( output['gt_bboxes_3d']._data.tensor, expected_tensor, atol=1e-3) @@ -207,6 +244,11 @@ def test_outdoor_velocity_aug_pipeline(): bbox3d_fields=[], img_fields=[]) + origin_center = gt_bboxes_3d.tensor[:, :3].clone() + origin_angle = gt_bboxes_3d.tensor[:, 6].clone( + ) # TODO: ObjectNoise modifies tensor!! + origin_velo = gt_bboxes_3d.tensor[:, 7:9].clone() + output = pipeline(results) expected_tensor = torch.tensor( @@ -246,5 +288,21 @@ def test_outdoor_velocity_aug_pipeline(): -4.4522e+00, -2.9166e+01, -7.8938e-01, 2.2841e+00, 3.8348e+00, 1.5925e+00, 1.4721e+00, -7.8371e-03, -8.1931e-03 ]]) + # coord sys refactor (manually go through pipeline) + rotation_angle = output['img_metas']._data['pcd_rotation_angle'] + rotation_matrix = output['img_metas']._data['pcd_rotation'] + expected_tensor[:, :3] = ((origin_center @ rotation_matrix) * + output['img_metas']._data['pcd_scale_factor'] * + torch.tensor([1, -1, 1]))[[ + 0, 1, 2, 3, 4, 5, 6, 7, 9 + ]] + angle = -origin_angle - rotation_angle + angle -= 2 * np.pi * (angle >= np.pi) + angle += 2 * np.pi * (angle < -np.pi) + expected_tensor[:, 6:7] = angle.unsqueeze(-1)[[0, 1, 2, 3, 4, 5, 6, 7, 9]] + expected_tensor[:, + 7:9] = ((origin_velo @ rotation_matrix[:2, :2]) * + output['img_metas']._data['pcd_scale_factor'] * + torch.tensor([1, -1]))[[0, 1, 2, 3, 4, 5, 6, 7, 9]] assert torch.allclose( output['gt_bboxes_3d']._data.tensor, expected_tensor, atol=1e-3) diff --git a/tests/test_models/test_common_modules/test_roiaware_pool3d.py b/tests/test_models/test_common_modules/test_roiaware_pool3d.py index 5b40decf8..c005be6a1 100644 --- a/tests/test_models/test_common_modules/test_roiaware_pool3d.py +++ b/tests/test_models/test_common_modules/test_roiaware_pool3d.py @@ -1,3 +1,4 @@ +import numpy as np import pytest import torch @@ -15,8 +16,8 @@ def test_RoIAwarePool3d(): roiaware_pool3d_avg = RoIAwarePool3d( out_size=4, max_pts_per_voxel=128, mode='avg') rois = torch.tensor( - [[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 0.3], - [-10.0, 23.0, 16.0, 10, 20, 20, 0.5]], + [[1.0, 2.0, 3.0, 5.0, 4.0, 6.0, -0.3 - np.pi / 2], + [-10.0, 23.0, 16.0, 20.0, 10.0, 20.0, -0.5 - np.pi / 2]], dtype=torch.float32).cuda( ) # boxes (m, 7) with bottom center in lidar coordinate pts = torch.tensor( @@ -63,6 +64,17 @@ def test_points_in_boxes_gpu(): assert point_indices.shape == torch.Size([2, 8]) assert (point_indices == expected_point_indices).all() + boxes = torch.tensor([[[0.0, 0.0, 0.0, 1.0, 20.0, 1.0, 0.523598]]], + dtype=torch.float32).cuda() # 30 degrees + pts = torch.tensor( + [[[4, 6.928, 0], [6.928, 4, 0], [4, -6.928, 0], [6.928, -4, 0], + [-4, 6.928, 0], [-6.928, 4, 0], [-4, -6.928, 0], [-6.928, -4, 0]]], + dtype=torch.float32).cuda() + point_indices = points_in_boxes_gpu(points=pts, boxes=boxes) + expected_point_indices = torch.tensor([[-1, -1, 0, -1, 0, -1, -1, -1]], + dtype=torch.int32).cuda() + assert (point_indices == expected_point_indices).all() + if torch.cuda.device_count() > 1: pts = pts.to('cuda:1') boxes = boxes.to('cuda:1') @@ -74,23 +86,35 @@ def test_points_in_boxes_gpu(): def test_points_in_boxes_cpu(): boxes = torch.tensor( - [[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 0.3], - [-10.0, 23.0, 16.0, 10, 20, 20, 0.5]], + [[[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 0.3], + [-10.0, 23.0, 16.0, 10, 20, 20, 0.5]]], dtype=torch.float32 ) # boxes (m, 7) with bottom center in lidar coordinate pts = torch.tensor( - [[1, 2, 3.3], [1.2, 2.5, 3.0], [0.8, 2.1, 3.5], [1.6, 2.6, 3.6], - [0.8, 1.2, 3.9], [-9.2, 21.0, 18.2], [3.8, 7.9, 6.3], - [4.7, 3.5, -12.2], [3.8, 7.6, -2], [-10.6, -12.9, -20], [-16, -18, 9], - [-21.3, -52, -5], [0, 0, 0], [6, 7, 8], [-2, -3, -4]], + [[[1, 2, 3.3], [1.2, 2.5, 3.0], [0.8, 2.1, 3.5], [1.6, 2.6, 3.6], + [0.8, 1.2, 3.9], [-9.2, 21.0, 18.2], [3.8, 7.9, 6.3], + [4.7, 3.5, -12.2], [3.8, 7.6, -2], [-10.6, -12.9, -20], [ + -16, -18, 9 + ], [-21.3, -52, -5], [0, 0, 0], [6, 7, 8], [-2, -3, -4]]], dtype=torch.float32) # points (n, 3) in lidar coordinate point_indices = points_in_boxes_cpu(points=pts, boxes=boxes) expected_point_indices = torch.tensor( - [[1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], - [0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0]], + [[[1, 0], [1, 0], [1, 0], [1, 0], [1, 0], [0, 1], [0, 0], [0, 0], + [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0]]], dtype=torch.int32) - assert point_indices.shape == torch.Size([2, 15]) + assert point_indices.shape == torch.Size([1, 15, 2]) + assert (point_indices == expected_point_indices).all() + + boxes = torch.tensor([[[0.0, 0.0, 0.0, 1.0, 20.0, 1.0, 0.523598]]], + dtype=torch.float32) # 30 degrees + pts = torch.tensor( + [[[4, 6.928, 0], [6.928, 4, 0], [4, -6.928, 0], [6.928, -4, 0], + [-4, 6.928, 0], [-6.928, 4, 0], [-4, -6.928, 0], [-6.928, -4, 0]]], + dtype=torch.float32) + point_indices = points_in_boxes_cpu(points=pts, boxes=boxes) + expected_point_indices = torch.tensor( + [[[0], [0], [1], [0], [1], [0], [0], [0]]], dtype=torch.int32) assert (point_indices == expected_point_indices).all() diff --git a/tests/test_models/test_heads/test_parta2_bbox_head.py b/tests/test_models/test_heads/test_parta2_bbox_head.py index 7ba5e7a5d..27174d30e 100644 --- a/tests/test_models/test_heads/test_parta2_bbox_head.py +++ b/tests/test_models/test_heads/test_parta2_bbox_head.py @@ -75,7 +75,7 @@ def test_loss(): 2.0579e-02, 1.5005e-04, 3.5252e-05, 0.0000e+00, 2.0433e-05, 1.5422e-05 ]) expected_loss_bbox = torch.as_tensor(0.0622) - expected_loss_corner = torch.Tensor([0.1379]) + expected_loss_corner = torch.Tensor([0.1374]) assert torch.allclose(loss['loss_cls'], expected_loss_cls, 1e-3) assert torch.allclose(loss['loss_bbox'], expected_loss_bbox, 1e-3) @@ -200,7 +200,7 @@ def test_get_targets(): ]) expected_bbox_targets = torch.Tensor( - [[0.0805, 0.0130, 0.0047, 0.0542, -0.2252, 0.0299, -0.1495]]) + [[-0.0632, 0.0516, 0.0047, 0.0542, -0.2252, 0.0299, -0.1495]]) expected_pos_gt_bboxes = torch.Tensor( [[7.8417, -0.1405, -1.9652, 1.6122, 3.2838, 1.5331, -2.0835]]) @@ -344,12 +344,11 @@ def test_get_bboxes(): selected_bboxes, selected_scores, selected_label_preds = result_list[0] expected_selected_bboxes = torch.Tensor( - [[56.2170, 25.9074, -1.3610, 1.6025, 3.6730, 1.5128, -0.1179], - [54.6521, 28.8846, -1.9145, 1.6362, 4.0573, 1.5599, -1.7335], - [31.6179, -5.6004, -1.2470, 1.6458, 4.1622, 1.5632, -1.5734]]).cuda() + [[56.0888, 25.6445, -1.3610, 1.6025, 3.6730, 1.5128, -0.1179], + [54.4606, 29.2412, -1.9145, 1.6362, 4.0573, 1.5599, -1.7335], + [31.8887, -5.8574, -1.2470, 1.6458, 4.1622, 1.5632, -1.5734]]).cuda() expected_selected_scores = torch.Tensor([-2.2061, -2.1121, -0.1761]).cuda() expected_selected_label_preds = torch.Tensor([2., 2., 2.]).cuda() - assert torch.allclose(selected_bboxes.tensor, expected_selected_bboxes, 1e-3) assert torch.allclose(selected_scores, expected_selected_scores, 1e-3) @@ -386,43 +385,43 @@ def test_multi_class_nms(): box_preds = torch.Tensor( [[ 5.6217e+01, 2.5908e+01, -1.3611e+00, 1.6025e+00, 3.6730e+00, - 1.5129e+00, -1.1786e-01 + 1.5129e+00, 1.1786e-01 ], [ 5.4653e+01, 2.8885e+01, -1.9145e+00, 1.6362e+00, 4.0574e+00, - 1.5599e+00, -1.7335e+00 + 1.5599e+00, 1.7335e+00 ], [ 5.5809e+01, 2.5686e+01, -1.4457e+00, 1.5939e+00, 3.8270e+00, - 1.4997e+00, -2.9191e+00 + 1.4997e+00, 2.9191e+00 ], [ 5.6107e+01, 2.6082e+01, -1.3557e+00, 1.5782e+00, 3.7444e+00, - 1.5266e+00, 1.7707e-01 + 1.5266e+00, -1.7707e-01 ], [ 3.1618e+01, -5.6004e+00, -1.2470e+00, 1.6459e+00, 4.1622e+00, - 1.5632e+00, -1.5734e+00 + 1.5632e+00, 1.5734e+00 ], [ 3.1605e+01, -5.6342e+00, -1.2467e+00, 1.6474e+00, 4.1519e+00, - 1.5481e+00, -1.6313e+00 + 1.5481e+00, 1.6313e+00 ], [ 5.6211e+01, 2.7294e+01, -1.5350e+00, 1.5422e+00, 3.7733e+00, - 1.5140e+00, 9.5846e-02 + 1.5140e+00, -9.5846e-02 ], [ 5.5907e+01, 2.7155e+01, -1.4712e+00, 1.5416e+00, 3.7611e+00, - 1.5142e+00, -5.2059e-02 + 1.5142e+00, 5.2059e-02 ], [ 5.4000e+01, 3.0585e+01, -1.6874e+00, 1.6495e+00, 4.0376e+00, - 1.5554e+00, -1.7900e+00 + 1.5554e+00, 1.7900e+00 ], [ 5.6007e+01, 2.6300e+01, -1.3945e+00, 1.5716e+00, 3.7064e+00, - 1.4715e+00, -2.9639e+00 + 1.4715e+00, 2.9639e+00 ]]).cuda() input_meta = dict( diff --git a/tests/test_models/test_heads/test_roi_extractors.py b/tests/test_models/test_heads/test_roi_extractors.py index 703cae3ba..1316aa359 100644 --- a/tests/test_models/test_heads/test_roi_extractors.py +++ b/tests/test_models/test_heads/test_roi_extractors.py @@ -1,3 +1,4 @@ +import numpy as np import pytest import torch @@ -20,8 +21,8 @@ def test_single_roiaware_extractor(): dtype=torch.float32).cuda() coordinate = feats.clone() batch_inds = torch.zeros(feats.shape[0]).cuda() - rois = torch.tensor([[0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 0.3], - [0, -10.0, 23.0, 16.0, 10, 20, 20, 0.5]], + rois = torch.tensor([[0, 1.0, 2.0, 3.0, 5.0, 4.0, 6.0, -0.3 - np.pi / 2], + [0, -10.0, 23.0, 16.0, 20, 10, 20, -0.5 - np.pi / 2]], dtype=torch.float32).cuda() # test forward pooled_feats = self(feats, coordinate, batch_inds, rois) diff --git a/tests/test_models/test_heads/test_semantic_heads.py b/tests/test_models/test_heads/test_semantic_heads.py index b7ac06900..ac0e13f4a 100644 --- a/tests/test_models/test_heads/test_semantic_heads.py +++ b/tests/test_models/test_heads/test_semantic_heads.py @@ -52,11 +52,11 @@ def test_PointwiseSemanticHead(): gt_bboxes = [ LiDARInstance3DBoxes( torch.tensor( - [[6.4118, -3.4305, -1.7291, 1.7033, 3.4693, 1.6197, -0.9091]], + [[6.4118, -3.4305, -1.7291, 1.7033, 3.4693, 1.6197, 0.9091]], dtype=torch.float32).cuda()), LiDARInstance3DBoxes( torch.tensor( - [[16.9107, 9.7925, -1.9201, 1.6097, 3.2786, 1.5307, -2.4056]], + [[16.9107, 9.7925, -1.9201, 1.6097, 3.2786, 1.5307, 2.4056]], dtype=torch.float32).cuda()) ] # batch size is 2 in the unit test diff --git a/tests/test_utils/test_anchors.py b/tests/test_utils/test_anchors.py index eb6172d6d..e052e5ed8 100644 --- a/tests/test_utils/test_anchors.py +++ b/tests/test_utils/test_anchors.py @@ -21,7 +21,7 @@ def test_anchor_3d_range_generator(): [0, -39.68, -0.6, 70.4, 39.68, -0.6], [0, -39.68, -1.78, 70.4, 39.68, -1.78], ], - sizes=[[0.6, 0.8, 1.73], [0.6, 1.76, 1.73], [1.6, 3.9, 1.56]], + sizes=[[0.8, 0.6, 1.73], [1.76, 0.6, 1.73], [3.9, 1.6, 1.56]], rotations=[0, 1.57], reshape_out=False) @@ -31,8 +31,8 @@ def test_anchor_3d_range_generator(): '[[0, -39.68, -0.6, 70.4, 39.68, -0.6], ' \ '[0, -39.68, -0.6, 70.4, 39.68, -0.6], ' \ '[0, -39.68, -1.78, 70.4, 39.68, -1.78]],' \ - '\nscales=[1],\nsizes=[[0.6, 0.8, 1.73], ' \ - '[0.6, 1.76, 1.73], [1.6, 3.9, 1.56]],' \ + '\nscales=[1],\nsizes=[[0.8, 0.6, 1.73], ' \ + '[1.76, 0.6, 1.73], [3.9, 1.6, 1.56]],' \ '\nrotations=[0, 1.57],\nreshape_out=False,' \ '\nsize_per_range=True)' assert repr_str == expected_repr_str @@ -53,8 +53,8 @@ def test_aligned_anchor_generator(): ranges=[[-51.2, -51.2, -1.80, 51.2, 51.2, -1.80]], scales=[1, 2, 4], sizes=[ - [0.8660, 2.5981, 1.], # 1.5/sqrt(3) - [0.5774, 1.7321, 1.], # 1/sqrt(3) + [2.5981, 0.8660, 1.], # 1.5/sqrt(3) + [1.7321, 0.5774, 1.], # 1/sqrt(3) [1., 1., 1.], [0.4, 0.4, 1], ], @@ -70,7 +70,7 @@ def test_aligned_anchor_generator(): # check base anchors expected_grid_anchors = [ torch.tensor([[ - -51.0000, -51.0000, -1.8000, 0.8660, 2.5981, 1.0000, 0.0000, + -51.0000, -51.0000, -1.8000, 2.5981, 0.8660, 1.0000, 0.0000, 0.0000, 0.0000 ], [ @@ -90,20 +90,20 @@ def test_aligned_anchor_generator(): 0.0000, 0.0000, 0.0000 ], [ - -49.4000, -51.0000, -1.8000, 0.5774, 1.7321, 1.0000, + -49.4000, -51.0000, -1.8000, 1.7321, 0.5774, 1.0000, 1.5700, 0.0000, 0.0000 ], [ - -49.0000, -51.0000, -1.8000, 0.5774, 1.7321, 1.0000, + -49.0000, -51.0000, -1.8000, 1.7321, 0.5774, 1.0000, 0.0000, 0.0000, 0.0000 ], [ - -48.6000, -51.0000, -1.8000, 0.8660, 2.5981, 1.0000, + -48.6000, -51.0000, -1.8000, 2.5981, 0.8660, 1.0000, 1.5700, 0.0000, 0.0000 ]], device=device), torch.tensor([[ - -50.8000, -50.8000, -1.8000, 1.7320, 5.1962, 2.0000, 0.0000, + -50.8000, -50.8000, -1.8000, 5.1962, 1.7320, 2.0000, 0.0000, 0.0000, 0.0000 ], [ @@ -123,20 +123,20 @@ def test_aligned_anchor_generator(): 0.0000, 0.0000, 0.0000 ], [ - -47.6000, -50.8000, -1.8000, 1.1548, 3.4642, 2.0000, + -47.6000, -50.8000, -1.8000, 3.4642, 1.1548, 2.0000, 1.5700, 0.0000, 0.0000 ], [ - -46.8000, -50.8000, -1.8000, 1.1548, 3.4642, 2.0000, + -46.8000, -50.8000, -1.8000, 3.4642, 1.1548, 2.0000, 0.0000, 0.0000, 0.0000 ], [ - -46.0000, -50.8000, -1.8000, 1.7320, 5.1962, 2.0000, + -46.0000, -50.8000, -1.8000, 5.1962, 1.7320, 2.0000, 1.5700, 0.0000, 0.0000 ]], device=device), torch.tensor([[ - -50.4000, -50.4000, -1.8000, 3.4640, 10.3924, 4.0000, 0.0000, + -50.4000, -50.4000, -1.8000, 10.3924, 3.4640, 4.0000, 0.0000, 0.0000, 0.0000 ], [ @@ -156,15 +156,15 @@ def test_aligned_anchor_generator(): 0.0000, 0.0000, 0.0000 ], [ - -44.0000, -50.4000, -1.8000, 2.3096, 6.9284, 4.0000, + -44.0000, -50.4000, -1.8000, 6.9284, 2.3096, 4.0000, 1.5700, 0.0000, 0.0000 ], [ - -42.4000, -50.4000, -1.8000, 2.3096, 6.9284, 4.0000, + -42.4000, -50.4000, -1.8000, 6.9284, 2.3096, 4.0000, 0.0000, 0.0000, 0.0000 ], [ - -40.8000, -50.4000, -1.8000, 3.4640, 10.3924, 4.0000, + -40.8000, -50.4000, -1.8000, 10.3924, 3.4640, 4.0000, 1.5700, 0.0000, 0.0000 ]], device=device) @@ -193,7 +193,7 @@ def test_aligned_anchor_generator_per_cls(): type='AlignedAnchor3DRangeGeneratorPerCls', ranges=[[-100, -100, -1.80, 100, 100, -1.80], [-100, -100, -1.30, 100, 100, -1.30]], - sizes=[[0.63, 1.76, 1.44], [0.96, 2.35, 1.59]], + sizes=[[1.76, 0.63, 1.44], [2.35, 0.96, 1.59]], custom_values=[0, 0], rotations=[0, 1.57], reshape_out=False) @@ -204,20 +204,20 @@ def test_aligned_anchor_generator_per_cls(): # check base anchors expected_grid_anchors = [[ torch.tensor([[ - -99.0000, -99.0000, -1.8000, 0.6300, 1.7600, 1.4400, 0.0000, + -99.0000, -99.0000, -1.8000, 1.7600, 0.6300, 1.4400, 0.0000, 0.0000, 0.0000 ], [ - -99.0000, -99.0000, -1.8000, 0.6300, 1.7600, 1.4400, + -99.0000, -99.0000, -1.8000, 1.7600, 0.6300, 1.4400, 1.5700, 0.0000, 0.0000 ]], device=device), torch.tensor([[ - -98.0000, -98.0000, -1.3000, 0.9600, 2.3500, 1.5900, 0.0000, + -98.0000, -98.0000, -1.3000, 2.3500, 0.9600, 1.5900, 0.0000, 0.0000, 0.0000 ], [ - -98.0000, -98.0000, -1.3000, 0.9600, 2.3500, 1.5900, + -98.0000, -98.0000, -1.3000, 2.3500, 0.9600, 1.5900, 1.5700, 0.0000, 0.0000 ]], device=device) diff --git a/tests/test_utils/test_box3d.py b/tests/test_utils/test_box3d.py index 8bcf12b46..810921c80 100644 --- a/tests/test_utils/test_box3d.py +++ b/tests/test_utils/test_box3d.py @@ -139,10 +139,15 @@ def test_lidar_boxes3d(): assert torch.allclose(expected_tensor, bottom_center_box.tensor) # Test init with numpy array - np_boxes = np.array( - [[1.7802081, 2.516249, -1.7501148, 1.75, 3.39, 1.65, 1.48], - [8.959413, 2.4567227, -1.6357126, 1.54, 4.01, 1.57, 1.62]], - dtype=np.float32) + np_boxes = np.array([[ + 1.7802081, 2.516249, -1.7501148, 1.75, 3.39, 1.65, + 1.48 - 0.13603681398218053 * 4 + ], + [ + 8.959413, 2.4567227, -1.6357126, 1.54, 4.01, 1.57, + 1.62 - 0.13603681398218053 * 4 + ]], + dtype=np.float32) boxes_1 = LiDARInstance3DBoxes(np_boxes) assert torch.allclose(boxes_1.tensor, torch.from_numpy(np_boxes)) @@ -156,15 +161,15 @@ def test_lidar_boxes3d(): th_boxes = torch.tensor( [[ 28.29669987, -0.5557558, -1.30332506, 1.47000003, 2.23000002, - 1.48000002, -1.57000005 + 1.48000002, -1.57000005 - 0.13603681398218053 * 4 ], [ 26.66901946, 21.82302134, -1.73605708, 1.55999994, 3.48000002, - 1.39999998, -1.69000006 + 1.39999998, -1.69000006 - 0.13603681398218053 * 4 ], [ 31.31977974, 8.16214412, -1.62177875, 1.74000001, 3.76999998, - 1.48000002, 2.78999996 + 1.48000002, 2.78999996 - 0.13603681398218053 * 4 ]], dtype=torch.float32) boxes_2 = LiDARInstance3DBoxes(th_boxes) @@ -175,12 +180,30 @@ def test_lidar_boxes3d(): boxes_1 = boxes_1.to(boxes_2.device) # test box concatenation - expected_tensor = torch.tensor( - [[1.7802081, 2.516249, -1.7501148, 1.75, 3.39, 1.65, 1.48], - [8.959413, 2.4567227, -1.6357126, 1.54, 4.01, 1.57, 1.62], - [28.2967, -0.5557558, -1.303325, 1.47, 2.23, 1.48, -1.57], - [26.66902, 21.82302, -1.736057, 1.56, 3.48, 1.4, -1.69], - [31.31978, 8.162144, -1.6217787, 1.74, 3.77, 1.48, 2.79]]) + expected_tensor = torch.tensor([[ + 1.7802081, 2.516249, -1.7501148, 1.75, 3.39, 1.65, + 1.48 - 0.13603681398218053 * 4 + ], + [ + 8.959413, 2.4567227, -1.6357126, 1.54, + 4.01, 1.57, + 1.62 - 0.13603681398218053 * 4 + ], + [ + 28.2967, -0.5557558, -1.303325, 1.47, + 2.23, 1.48, + -1.57 - 0.13603681398218053 * 4 + ], + [ + 26.66902, 21.82302, -1.736057, 1.56, + 3.48, 1.4, + -1.69 - 0.13603681398218053 * 4 + ], + [ + 31.31978, 8.162144, -1.6217787, 1.74, + 3.77, 1.48, + 2.79 - 0.13603681398218053 * 4 + ]]) boxes = LiDARInstance3DBoxes.cat([boxes_1, boxes_2]) assert torch.allclose(boxes.tensor, expected_tensor) # concatenate empty list @@ -195,11 +218,26 @@ def test_lidar_boxes3d(): [0.6533, -0.5520, -0.5265], [4.5870, 0.5358, -1.4741]]) expected_tensor = torch.tensor( - [[1.7802081, -2.516249, -1.7501148, 1.75, 3.39, 1.65, 1.6615927], - [8.959413, -2.4567227, -1.6357126, 1.54, 4.01, 1.57, 1.5215927], - [28.2967, 0.5557558, -1.303325, 1.47, 2.23, 1.48, 4.7115927], - [26.66902, -21.82302, -1.736057, 1.56, 3.48, 1.4, 4.8315926], - [31.31978, -8.162144, -1.6217787, 1.74, 3.77, 1.48, 0.35159278]]) + [[ + 1.7802081, -2.516249, -1.7501148, 1.75, 3.39, 1.65, + 1.6615927 - np.pi + 0.13603681398218053 * 4 + ], + [ + 8.959413, -2.4567227, -1.6357126, 1.54, 4.01, 1.57, + 1.5215927 - np.pi + 0.13603681398218053 * 4 + ], + [ + 28.2967, 0.5557558, -1.303325, 1.47, 2.23, 1.48, + 4.7115927 - np.pi + 0.13603681398218053 * 4 + ], + [ + 26.66902, -21.82302, -1.736057, 1.56, 3.48, 1.4, + 4.8315926 - np.pi + 0.13603681398218053 * 4 + ], + [ + 31.31978, -8.162144, -1.6217787, 1.74, 3.77, 1.48, + 0.35159278 - np.pi + 0.13603681398218053 * 4 + ]]) expected_points = torch.tensor([[1.2559, 0.6762, -1.4658], [4.7814, 0.8784, -1.3857], [6.7053, -0.2517, -0.9697], @@ -210,11 +248,26 @@ def test_lidar_boxes3d(): assert torch.allclose(points, expected_points, 1e-3) expected_tensor = torch.tensor( - [[-1.7802, -2.5162, -1.7501, 1.7500, 3.3900, 1.6500, -1.6616], - [-8.9594, -2.4567, -1.6357, 1.5400, 4.0100, 1.5700, -1.5216], - [-28.2967, 0.5558, -1.3033, 1.4700, 2.2300, 1.4800, -4.7116], - [-26.6690, -21.8230, -1.7361, 1.5600, 3.4800, 1.4000, -4.8316], - [-31.3198, -8.1621, -1.6218, 1.7400, 3.7700, 1.4800, -0.3516]]) + [[ + -1.7802, -2.5162, -1.7501, 1.7500, 3.3900, 1.6500, + -1.6616 + np.pi * 2 - 0.13603681398218053 * 4 + ], + [ + -8.9594, -2.4567, -1.6357, 1.5400, 4.0100, 1.5700, + -1.5216 + np.pi * 2 - 0.13603681398218053 * 4 + ], + [ + -28.2967, 0.5558, -1.3033, 1.4700, 2.2300, 1.4800, + -4.7116 + np.pi * 2 - 0.13603681398218053 * 4 + ], + [ + -26.6690, -21.8230, -1.7361, 1.5600, 3.4800, 1.4000, + -4.8316 + np.pi * 2 - 0.13603681398218053 * 4 + ], + [ + -31.3198, -8.1621, -1.6218, 1.7400, 3.7700, 1.4800, + -0.3516 + np.pi * 2 - 0.13603681398218053 * 4 + ]]) boxes_flip_vert = boxes.clone() points = boxes_flip_vert.flip('vertical', points) expected_points = torch.tensor([[-1.2559, 0.6762, -1.4658], @@ -228,12 +281,27 @@ def test_lidar_boxes3d(): # test box rotation # with input torch.Tensor points and angle expected_tensor = torch.tensor( - [[1.4225, -2.7344, -1.7501, 1.7500, 3.3900, 1.6500, 1.7976], - [8.5435, -3.6491, -1.6357, 1.5400, 4.0100, 1.5700, 1.6576], - [28.1106, -3.2869, -1.3033, 1.4700, 2.2300, 1.4800, 4.8476], - [23.4630, -25.2382, -1.7361, 1.5600, 3.4800, 1.4000, 4.9676], - [29.9235, -12.3342, -1.6218, 1.7400, 3.7700, 1.4800, 0.4876]]) - points, rot_mat_T = boxes.rotate(0.13603681398218053, points) + [[ + 1.4225, -2.7344, -1.7501, 1.7500, 3.3900, 1.6500, + 1.7976 - np.pi + 0.13603681398218053 * 2 + ], + [ + 8.5435, -3.6491, -1.6357, 1.5400, 4.0100, 1.5700, + 1.6576 - np.pi + 0.13603681398218053 * 2 + ], + [ + 28.1106, -3.2869, -1.3033, 1.4700, 2.2300, 1.4800, + 4.8476 - np.pi + 0.13603681398218053 * 2 + ], + [ + 23.4630, -25.2382, -1.7361, 1.5600, 3.4800, 1.4000, + 4.9676 - np.pi + 0.13603681398218053 * 2 + ], + [ + 29.9235, -12.3342, -1.6218, 1.7400, 3.7700, 1.4800, + 0.4876 - np.pi + 0.13603681398218053 * 2 + ]]) + points, rot_mat_T = boxes.rotate(-0.13603681398218053, points) expected_points = torch.tensor([[-1.1526, 0.8403, -1.4658], [-4.6181, 1.5187, -1.3857], [-6.6775, 0.6600, -0.9697], @@ -247,7 +315,7 @@ def test_lidar_boxes3d(): assert torch.allclose(rot_mat_T, expected_rot_mat_T, 1e-3) # with input torch.Tensor points and rotation matrix - points, rot_mat_T = boxes.rotate(-0.13603681398218053, points) # back + points, rot_mat_T = boxes.rotate(0.13603681398218053, points) # back rot_mat = np.array([[0.99076125, -0.13561762, 0.], [0.13561762, 0.99076125, 0.], [0., 0., 1.]]) points, rot_mat_T = boxes.rotate(rot_mat, points) @@ -261,7 +329,7 @@ def test_lidar_boxes3d(): [-6.5263, 1.5595, -0.9697], [-0.4809, 0.7073, -0.5265], [-4.5623, 0.7166, -1.4741]]) - points_np, rot_mat_T_np = boxes.rotate(0.13603681398218053, points_np) + points_np, rot_mat_T_np = boxes.rotate(-0.13603681398218053, points_np) expected_points_np = np.array([[-0.8844, 1.1191, -1.4658], [-4.0401, 2.7039, -1.3857], [-6.2545, 2.4302, -0.9697], @@ -275,7 +343,7 @@ def test_lidar_boxes3d(): assert np.allclose(rot_mat_T_np, expected_rot_mat_T_np, 1e-3) # with input LiDARPoints and rotation matrix - points_np, rot_mat_T_np = boxes.rotate(-0.13603681398218053, points_np) + points_np, rot_mat_T_np = boxes.rotate(0.13603681398218053, points_np) lidar_points = LiDARPoints(points_np) lidar_points, rot_mat_T_np = boxes.rotate(rot_mat, lidar_points) points_np = lidar_points.tensor.numpy() @@ -286,27 +354,27 @@ def test_lidar_boxes3d(): # test box scaling expected_tensor = torch.tensor([[ 1.0443488, -2.9183323, -1.7599131, 1.7597977, 3.4089797, 1.6592377, - 1.9336663 + 1.9336663 - np.pi ], [ 8.014273, -4.8007393, -1.6448704, 1.5486219, 4.0324507, 1.57879, - 1.7936664 + 1.7936664 - np.pi ], [ 27.558605, -7.1084175, -1.310622, 1.4782301, 2.242485, 1.488286, - 4.9836664 + 4.9836664 - np.pi ], [ 19.934517, -28.344835, -1.7457767, 1.5687338, 3.4994833, 1.4078381, - 5.1036663 + 5.1036663 - np.pi ], [ 28.130915, -16.369587, -1.6308585, 1.7497417, 3.791107, 1.488286, - 0.6236664 + 0.6236664 - np.pi ]]) boxes.scale(1.00559866335275) assert torch.allclose(boxes.tensor, expected_tensor) @@ -314,27 +382,27 @@ def test_lidar_boxes3d(): # test box translation expected_tensor = torch.tensor([[ 1.1281544, -3.0507944, -1.9169292, 1.7597977, 3.4089797, 1.6592377, - 1.9336663 + 1.9336663 - np.pi ], [ 8.098079, -4.9332013, -1.8018866, 1.5486219, 4.0324507, 1.57879, - 1.7936664 + 1.7936664 - np.pi ], [ 27.64241, -7.2408795, -1.4676381, 1.4782301, 2.242485, 1.488286, - 4.9836664 + 4.9836664 - np.pi ], [ 20.018322, -28.477297, -1.9027928, 1.5687338, 3.4994833, 1.4078381, - 5.1036663 + 5.1036663 - np.pi ], [ 28.21472, -16.502048, -1.7878747, 1.7497417, 3.791107, 1.488286, - 0.6236664 + 0.6236664 - np.pi ]]) boxes.translate([0.0838056, -0.13246193, -0.15701613]) assert torch.allclose(boxes.tensor, expected_tensor) @@ -355,17 +423,17 @@ def test_lidar_boxes3d(): index_boxes = boxes[2:5] expected_tensor = torch.tensor([[ 27.64241, -7.2408795, -1.4676381, 1.4782301, 2.242485, 1.488286, - 4.9836664 + 4.9836664 - np.pi ], [ 20.018322, -28.477297, -1.9027928, 1.5687338, 3.4994833, 1.4078381, - 5.1036663 + 5.1036663 - np.pi ], [ 28.21472, -16.502048, -1.7878747, 1.7497417, 3.791107, 1.488286, - 0.6236664 + 0.6236664 - np.pi ]]) assert len(index_boxes) == 3 assert torch.allclose(index_boxes.tensor, expected_tensor) @@ -373,7 +441,7 @@ def test_lidar_boxes3d(): index_boxes = boxes[2] expected_tensor = torch.tensor([[ 27.64241, -7.2408795, -1.4676381, 1.4782301, 2.242485, 1.488286, - 4.9836664 + 4.9836664 - np.pi ]]) assert len(index_boxes) == 1 assert torch.allclose(index_boxes.tensor, expected_tensor) @@ -381,12 +449,12 @@ def test_lidar_boxes3d(): index_boxes = boxes[[2, 4]] expected_tensor = torch.tensor([[ 27.64241, -7.2408795, -1.4676381, 1.4782301, 2.242485, 1.488286, - 4.9836664 + 4.9836664 - np.pi ], [ 28.21472, -16.502048, -1.7878747, 1.7497417, 3.791107, 1.488286, - 0.6236664 + 0.6236664 - np.pi ]]) assert len(index_boxes) == 2 assert torch.allclose(index_boxes.tensor, expected_tensor) @@ -407,13 +475,13 @@ def test_lidar_boxes3d(): assert (boxes.tensor[:, 6] >= -np.pi / 2).all() Box3DMode.convert(boxes, Box3DMode.LIDAR, Box3DMode.LIDAR) - expected_tesor = boxes.tensor.clone() - assert torch.allclose(expected_tesor, boxes.tensor) + expected_tensor = boxes.tensor.clone() + assert torch.allclose(expected_tensor, boxes.tensor) boxes.flip() boxes.flip() boxes.limit_yaw() - assert torch.allclose(expected_tesor, boxes.tensor) + assert torch.allclose(expected_tensor, boxes.tensor) # test nearest_bev expected_tensor = torch.tensor([[-0.5763, -3.9307, 2.8326, -2.1709], @@ -421,52 +489,50 @@ def test_lidar_boxes3d(): [26.5212, -7.9800, 28.7637, -6.5018], [18.2686, -29.2617, 21.7681, -27.6929], [27.3398, -18.3976, 29.0896, -14.6065]]) - # the pytorch print loses some precision assert torch.allclose( boxes.nearest_bev, expected_tensor, rtol=1e-4, atol=1e-7) - # obtained by the print of the original implementation - expected_tensor = torch.tensor([[[2.4093e+00, -4.4784e+00, -1.9169e+00], - [2.4093e+00, -4.4784e+00, -2.5769e-01], - [-7.7767e-01, -3.2684e+00, -2.5769e-01], - [-7.7767e-01, -3.2684e+00, -1.9169e+00], - [3.0340e+00, -2.8332e+00, -1.9169e+00], - [3.0340e+00, -2.8332e+00, -2.5769e-01], - [-1.5301e-01, -1.6232e+00, -2.5769e-01], - [-1.5301e-01, -1.6232e+00, -1.9169e+00]], - [[9.8933e+00, -6.1340e+00, -1.8019e+00], - [9.8933e+00, -6.1340e+00, -2.2310e-01], - [5.9606e+00, -5.2427e+00, -2.2310e-01], - [5.9606e+00, -5.2427e+00, -1.8019e+00], - [1.0236e+01, -4.6237e+00, -1.8019e+00], - [1.0236e+01, -4.6237e+00, -2.2310e-01], - [6.3029e+00, -3.7324e+00, -2.2310e-01], - [6.3029e+00, -3.7324e+00, -1.8019e+00]], - [[2.8525e+01, -8.2534e+00, -1.4676e+00], - [2.8525e+01, -8.2534e+00, 2.0648e-02], - [2.6364e+01, -7.6525e+00, 2.0648e-02], - [2.6364e+01, -7.6525e+00, -1.4676e+00], - [2.8921e+01, -6.8292e+00, -1.4676e+00], - [2.8921e+01, -6.8292e+00, 2.0648e-02], - [2.6760e+01, -6.2283e+00, 2.0648e-02], - [2.6760e+01, -6.2283e+00, -1.4676e+00]], - [[2.1337e+01, -2.9870e+01, -1.9028e+00], - [2.1337e+01, -2.9870e+01, -4.9495e-01], - [1.8102e+01, -2.8535e+01, -4.9495e-01], - [1.8102e+01, -2.8535e+01, -1.9028e+00], - [2.1935e+01, -2.8420e+01, -1.9028e+00], - [2.1935e+01, -2.8420e+01, -4.9495e-01], - [1.8700e+01, -2.7085e+01, -4.9495e-01], - [1.8700e+01, -2.7085e+01, -1.9028e+00]], - [[2.6398e+01, -1.7530e+01, -1.7879e+00], - [2.6398e+01, -1.7530e+01, -2.9959e-01], - [2.8612e+01, -1.4452e+01, -2.9959e-01], - [2.8612e+01, -1.4452e+01, -1.7879e+00], - [2.7818e+01, -1.8552e+01, -1.7879e+00], - [2.7818e+01, -1.8552e+01, -2.9959e-01], - [3.0032e+01, -1.5474e+01, -2.9959e-01], - [3.0032e+01, -1.5474e+01, -1.7879e+00]]]) - # the pytorch print loses some precision + expected_tensor = torch.tensor([[[-7.7767e-01, -2.8332e+00, -1.9169e+00], + [-7.7767e-01, -2.8332e+00, -2.5769e-01], + [2.4093e+00, -1.6232e+00, -2.5769e-01], + [2.4093e+00, -1.6232e+00, -1.9169e+00], + [-1.5301e-01, -4.4784e+00, -1.9169e+00], + [-1.5301e-01, -4.4784e+00, -2.5769e-01], + [3.0340e+00, -3.2684e+00, -2.5769e-01], + [3.0340e+00, -3.2684e+00, -1.9169e+00]], + [[5.9606e+00, -4.6237e+00, -1.8019e+00], + [5.9606e+00, -4.6237e+00, -2.2310e-01], + [9.8933e+00, -3.7324e+00, -2.2310e-01], + [9.8933e+00, -3.7324e+00, -1.8019e+00], + [6.3029e+00, -6.1340e+00, -1.8019e+00], + [6.3029e+00, -6.1340e+00, -2.2310e-01], + [1.0236e+01, -5.2427e+00, -2.2310e-01], + [1.0236e+01, -5.2427e+00, -1.8019e+00]], + [[2.6364e+01, -6.8292e+00, -1.4676e+00], + [2.6364e+01, -6.8292e+00, 2.0648e-02], + [2.8525e+01, -6.2283e+00, 2.0648e-02], + [2.8525e+01, -6.2283e+00, -1.4676e+00], + [2.6760e+01, -8.2534e+00, -1.4676e+00], + [2.6760e+01, -8.2534e+00, 2.0648e-02], + [2.8921e+01, -7.6525e+00, 2.0648e-02], + [2.8921e+01, -7.6525e+00, -1.4676e+00]], + [[1.8102e+01, -2.8420e+01, -1.9028e+00], + [1.8102e+01, -2.8420e+01, -4.9495e-01], + [2.1337e+01, -2.7085e+01, -4.9495e-01], + [2.1337e+01, -2.7085e+01, -1.9028e+00], + [1.8700e+01, -2.9870e+01, -1.9028e+00], + [1.8700e+01, -2.9870e+01, -4.9495e-01], + [2.1935e+01, -2.8535e+01, -4.9495e-01], + [2.1935e+01, -2.8535e+01, -1.9028e+00]], + [[2.8612e+01, -1.8552e+01, -1.7879e+00], + [2.8612e+01, -1.8552e+01, -2.9959e-01], + [2.6398e+01, -1.5474e+01, -2.9959e-01], + [2.6398e+01, -1.5474e+01, -1.7879e+00], + [3.0032e+01, -1.7530e+01, -1.7879e+00], + [3.0032e+01, -1.7530e+01, -2.9959e-01], + [2.7818e+01, -1.4452e+01, -2.9959e-01], + [2.7818e+01, -1.4452e+01, -1.7879e+00]]]) + assert torch.allclose(boxes.corners, expected_tensor, rtol=1e-4, atol=1e-7) # test new_box @@ -557,26 +623,27 @@ def test_boxes_conversion(): [0.000000e+00, 0.000000e+00, 0.000000e+00, 1.000000e+00]], dtype=torch.float32) + # coord sys refactor (reverse sign of yaw) expected_tensor = torch.tensor( [[ - 2.16902434e+01, -4.06038554e-02, -1.61906639e+00, 1.65999997e+00, - 3.20000005e+00, 1.61000001e+00, -1.53999996e+00 + 2.16902434e+01, -4.06038554e-02, -1.61906639e+00, 3.20000005e+00, + 1.65999997e+00, 1.61000001e+00, 1.53999996e+00 - np.pi / 2 ], [ - 7.05006905e+00, -6.57459601e+00, -1.60107949e+00, 2.27999997e+00, - 1.27799997e+01, 3.66000009e+00, 1.54999995e+00 + 7.05006905e+00, -6.57459601e+00, -1.60107949e+00, 1.27799997e+01, + 2.27999997e+00, 3.66000009e+00, -1.54999995e+00 - np.pi / 2 ], [ - 2.24698818e+01, -6.69203759e+00, -1.50118145e+00, 2.31999993e+00, - 1.47299995e+01, 3.64000010e+00, 1.59000003e+00 + 2.24698818e+01, -6.69203759e+00, -1.50118145e+00, 1.47299995e+01, + 2.31999993e+00, 3.64000010e+00, -1.59000003e+00 + 3 * np.pi / 2 ], [ - 3.48291965e+01, -7.09058388e+00, -1.36622983e+00, 2.31999993e+00, - 1.00400000e+01, 3.60999990e+00, 1.61000001e+00 + 3.48291965e+01, -7.09058388e+00, -1.36622983e+00, 1.00400000e+01, + 2.31999993e+00, 3.60999990e+00, -1.61000001e+00 + 3 * np.pi / 2 ], [ - 4.62394617e+01, -7.75838800e+00, -1.32405020e+00, 2.33999991e+00, - 1.28299999e+01, 3.63000011e+00, 1.63999999e+00 + 4.62394617e+01, -7.75838800e+00, -1.32405020e+00, 1.28299999e+01, + 2.33999991e+00, 3.63000011e+00, -1.63999999e+00 + 3 * np.pi / 2 ]], dtype=torch.float32) @@ -636,10 +703,15 @@ def test_boxes_conversion(): def test_camera_boxes3d(): # Test init with numpy array - np_boxes = np.array( - [[1.7802081, 2.516249, -1.7501148, 1.75, 3.39, 1.65, 1.48], - [8.959413, 2.4567227, -1.6357126, 1.54, 4.01, 1.57, 1.62]], - dtype=np.float32) + np_boxes = np.array([[ + 1.7802081, 2.516249, -1.7501148, 1.75, 3.39, 1.65, + 1.48 - 0.13603681398218053 * 4 - 2 * np.pi + ], + [ + 8.959413, 2.4567227, -1.6357126, 1.54, 4.01, 1.57, + 1.62 - 0.13603681398218053 * 4 - 2 * np.pi + ]], + dtype=np.float32) boxes_1 = Box3DMode.convert( LiDARInstance3DBoxes(np_boxes), Box3DMode.LIDAR, Box3DMode.CAM) @@ -653,15 +725,15 @@ def test_camera_boxes3d(): th_boxes = torch.tensor( [[ 28.29669987, -0.5557558, -1.30332506, 1.47000003, 2.23000002, - 1.48000002, -1.57000005 + 1.48000002, -1.57000005 - 0.13603681398218053 * 4 - 2 * np.pi ], [ 26.66901946, 21.82302134, -1.73605708, 1.55999994, 3.48000002, - 1.39999998, -1.69000006 + 1.39999998, -1.69000006 - 0.13603681398218053 * 4 - 2 * np.pi ], [ 31.31977974, 8.16214412, -1.62177875, 1.74000001, 3.76999998, - 1.48000002, 2.78999996 + 1.48000002, 2.78999996 - 0.13603681398218053 * 4 - 2 * np.pi ]], dtype=torch.float32) cam_th_boxes = Box3DMode.convert(th_boxes, Box3DMode.LIDAR, Box3DMode.CAM) @@ -674,13 +746,26 @@ def test_camera_boxes3d(): # test box concatenation expected_tensor = Box3DMode.convert( - torch.tensor( - [[1.7802081, 2.516249, -1.7501148, 1.75, 3.39, 1.65, 1.48], - [8.959413, 2.4567227, -1.6357126, 1.54, 4.01, 1.57, 1.62], - [28.2967, -0.5557558, -1.303325, 1.47, 2.23, 1.48, -1.57], - [26.66902, 21.82302, -1.736057, 1.56, 3.48, 1.4, -1.69], - [31.31978, 8.162144, -1.6217787, 1.74, 3.77, 1.48, 2.79]]), - Box3DMode.LIDAR, Box3DMode.CAM) + torch.tensor([[ + 1.7802081, 2.516249, -1.7501148, 1.75, 3.39, 1.65, + 1.48 - 0.13603681398218053 * 4 - 2 * np.pi + ], + [ + 8.959413, 2.4567227, -1.6357126, 1.54, 4.01, 1.57, + 1.62 - 0.13603681398218053 * 4 - 2 * np.pi + ], + [ + 28.2967, -0.5557558, -1.303325, 1.47, 2.23, 1.48, + -1.57 - 0.13603681398218053 * 4 - 2 * np.pi + ], + [ + 26.66902, 21.82302, -1.736057, 1.56, 3.48, 1.4, + -1.69 - 0.13603681398218053 * 4 - 2 * np.pi + ], + [ + 31.31978, 8.162144, -1.6217787, 1.74, 3.77, 1.48, + 2.79 - 0.13603681398218053 * 4 - 2 * np.pi + ]]), Box3DMode.LIDAR, Box3DMode.CAM) boxes = CameraInstance3DBoxes.cat([boxes_1, boxes_2]) assert torch.allclose(boxes.tensor, expected_tensor) @@ -689,28 +774,60 @@ def test_camera_boxes3d(): [-0.2517, 0.9697, 6.7053], [0.5520, 0.5265, 0.6533], [-0.5358, 1.4741, 4.5870]]) expected_tensor = Box3DMode.convert( - torch.tensor( - [[1.7802081, -2.516249, -1.7501148, 1.75, 3.39, 1.65, 1.6615927], - [8.959413, -2.4567227, -1.6357126, 1.54, 4.01, 1.57, 1.5215927], - [28.2967, 0.5557558, -1.303325, 1.47, 2.23, 1.48, 4.7115927], - [26.66902, -21.82302, -1.736057, 1.56, 3.48, 1.4, 4.8315926], - [31.31978, -8.162144, -1.6217787, 1.74, 3.77, 1.48, 0.35159278]]), - Box3DMode.LIDAR, Box3DMode.CAM) + torch.tensor([[ + 1.7802081, -2.516249, -1.7501148, 1.75, 3.39, 1.65, + 1.6615927 + 0.13603681398218053 * 4 - np.pi + ], + [ + 8.959413, -2.4567227, -1.6357126, 1.54, 4.01, 1.57, + 1.5215927 + 0.13603681398218053 * 4 - np.pi + ], + [ + 28.2967, 0.5557558, -1.303325, 1.47, 2.23, 1.48, + 4.7115927 + 0.13603681398218053 * 4 - np.pi + ], + [ + 26.66902, -21.82302, -1.736057, 1.56, 3.48, 1.4, + 4.8315926 + 0.13603681398218053 * 4 - np.pi + ], + [ + 31.31978, -8.162144, -1.6217787, 1.74, 3.77, 1.48, + 0.35159278 + 0.13603681398218053 * 4 - np.pi + ]]), Box3DMode.LIDAR, Box3DMode.CAM) points = boxes.flip('horizontal', points) expected_points = torch.tensor([[-0.6762, 1.4658, 1.2559], [-0.8784, 1.3857, 4.7814], [0.2517, 0.9697, 6.7053], [-0.5520, 0.5265, 0.6533], [0.5358, 1.4741, 4.5870]]) - assert torch.allclose(boxes.tensor, expected_tensor) + + yaw_normalized_tensor = boxes.tensor.clone() + yaw_normalized_tensor[:, -1:] = limit_period( + yaw_normalized_tensor[:, -1:], period=np.pi * 2) + assert torch.allclose(yaw_normalized_tensor, expected_tensor, 1e-3) assert torch.allclose(points, expected_points, 1e-3) expected_tensor = torch.tensor( - [[2.5162, 1.7501, -1.7802, 3.3900, 1.6500, 1.7500, -1.6616], - [2.4567, 1.6357, -8.9594, 4.0100, 1.5700, 1.5400, -1.5216], - [-0.5558, 1.3033, -28.2967, 2.2300, 1.4800, 1.4700, -4.7116], - [21.8230, 1.7361, -26.6690, 3.4800, 1.4000, 1.5600, -4.8316], - [8.1621, 1.6218, -31.3198, 3.7700, 1.4800, 1.7400, -0.3516]]) + [[ + 2.5162, 1.7501, -1.7802, 1.7500, 1.6500, 3.3900, + 1.6616 + 0.13603681398218053 * 4 - np.pi / 2 + ], + [ + 2.4567, 1.6357, -8.9594, 1.5400, 1.5700, 4.0100, + 1.5216 + 0.13603681398218053 * 4 - np.pi / 2 + ], + [ + -0.5558, 1.3033, -28.2967, 1.4700, 1.4800, 2.2300, + 4.7116 + 0.13603681398218053 * 4 - np.pi / 2 + ], + [ + 21.8230, 1.7361, -26.6690, 1.5600, 1.4000, 3.4800, + 4.8316 + 0.13603681398218053 * 4 - np.pi / 2 + ], + [ + 8.1621, 1.6218, -31.3198, 1.7400, 1.4800, 3.7700, + 0.3516 + 0.13603681398218053 * 4 - np.pi / 2 + ]]) boxes_flip_vert = boxes.clone() points = boxes_flip_vert.flip('vertical', points) expected_points = torch.tensor([[-0.6762, 1.4658, -1.2559], @@ -718,19 +835,38 @@ def test_camera_boxes3d(): [0.2517, 0.9697, -6.7053], [-0.5520, 0.5265, -0.6533], [0.5358, 1.4741, -4.5870]]) - assert torch.allclose(boxes_flip_vert.tensor, expected_tensor, 1e-4) + + yaw_normalized_tensor = boxes_flip_vert.tensor.clone() + yaw_normalized_tensor[:, -1:] = limit_period( + yaw_normalized_tensor[:, -1:], period=np.pi * 2) + expected_tensor[:, -1:] = limit_period( + expected_tensor[:, -1:], period=np.pi * 2) + assert torch.allclose(yaw_normalized_tensor, expected_tensor, 1e-4) assert torch.allclose(points, expected_points) # test box rotation # with input torch.Tensor points and angle expected_tensor = Box3DMode.convert( - torch.tensor( - [[1.4225, -2.7344, -1.7501, 1.7500, 3.3900, 1.6500, 1.7976], - [8.5435, -3.6491, -1.6357, 1.5400, 4.0100, 1.5700, 1.6576], - [28.1106, -3.2869, -1.3033, 1.4700, 2.2300, 1.4800, 4.8476], - [23.4630, -25.2382, -1.7361, 1.5600, 3.4800, 1.4000, 4.9676], - [29.9235, -12.3342, -1.6218, 1.7400, 3.7700, 1.4800, 0.4876]]), - Box3DMode.LIDAR, Box3DMode.CAM) + torch.tensor([[ + 1.4225, -2.7344, -1.7501, 1.7500, 3.3900, 1.6500, + 1.7976 + 0.13603681398218053 * 2 - np.pi + ], + [ + 8.5435, -3.6491, -1.6357, 1.5400, 4.0100, 1.5700, + 1.6576 + 0.13603681398218053 * 2 - np.pi + ], + [ + 28.1106, -3.2869, -1.3033, 1.4700, 2.2300, 1.4800, + 4.8476 + 0.13603681398218053 * 2 - np.pi + ], + [ + 23.4630, -25.2382, -1.7361, 1.5600, 3.4800, 1.4000, + 4.9676 + 0.13603681398218053 * 2 - np.pi + ], + [ + 29.9235, -12.3342, -1.6218, 1.7400, 3.7700, 1.4800, + 0.4876 + 0.13603681398218053 * 2 - np.pi + ]]), Box3DMode.LIDAR, Box3DMode.CAM) points, rot_mat_T = boxes.rotate(torch.tensor(0.13603681398218053), points) expected_points = torch.tensor([[-0.8403, 1.4658, -1.1526], [-1.5187, 1.3857, -4.6181], @@ -740,7 +876,12 @@ def test_camera_boxes3d(): expected_rot_mat_T = torch.tensor([[0.9908, 0.0000, -0.1356], [0.0000, 1.0000, 0.0000], [0.1356, 0.0000, 0.9908]]) - assert torch.allclose(boxes.tensor, expected_tensor, 1e-3) + yaw_normalized_tensor = boxes.tensor.clone() + yaw_normalized_tensor[:, -1:] = limit_period( + yaw_normalized_tensor[:, -1:], period=np.pi * 2) + expected_tensor[:, -1:] = limit_period( + expected_tensor[:, -1:], period=np.pi * 2) + assert torch.allclose(yaw_normalized_tensor, expected_tensor, 1e-3) assert torch.allclose(points, expected_points, 1e-3) assert torch.allclose(rot_mat_T, expected_rot_mat_T, 1e-3) @@ -750,7 +891,10 @@ def test_camera_boxes3d(): rot_mat = np.array([[0.99076125, 0., -0.13561762], [0., 1., 0.], [0.13561762, 0., 0.99076125]]) points, rot_mat_T = boxes.rotate(rot_mat, points) - assert torch.allclose(boxes.tensor, expected_tensor, 1e-3) + yaw_normalized_tensor = boxes.tensor.clone() + yaw_normalized_tensor[:, -1:] = limit_period( + yaw_normalized_tensor[:, -1:], period=np.pi * 2) + assert torch.allclose(yaw_normalized_tensor, expected_tensor, 1e-3) assert torch.allclose(points, expected_points, 1e-3) assert torch.allclose(rot_mat_T, expected_rot_mat_T, 1e-3) @@ -787,51 +931,61 @@ def test_camera_boxes3d(): expected_tensor = Box3DMode.convert( torch.tensor([[ 1.0443488, -2.9183323, -1.7599131, 1.7597977, 3.4089797, 1.6592377, - 1.9336663 + 1.9336663 - np.pi ], [ 8.014273, -4.8007393, -1.6448704, 1.5486219, - 4.0324507, 1.57879, 1.7936664 + 4.0324507, 1.57879, 1.7936664 - np.pi ], [ 27.558605, -7.1084175, -1.310622, 1.4782301, - 2.242485, 1.488286, 4.9836664 + 2.242485, 1.488286, 4.9836664 - np.pi ], [ 19.934517, -28.344835, -1.7457767, 1.5687338, - 3.4994833, 1.4078381, 5.1036663 + 3.4994833, 1.4078381, 5.1036663 - np.pi ], [ 28.130915, -16.369587, -1.6308585, 1.7497417, - 3.791107, 1.488286, 0.6236664 + 3.791107, 1.488286, 0.6236664 - np.pi ]]), Box3DMode.LIDAR, Box3DMode.CAM) boxes.scale(1.00559866335275) - assert torch.allclose(boxes.tensor, expected_tensor) + yaw_normalized_tensor = boxes.tensor.clone() + yaw_normalized_tensor[:, -1:] = limit_period( + yaw_normalized_tensor[:, -1:], period=np.pi * 2) + expected_tensor[:, -1:] = limit_period( + expected_tensor[:, -1:], period=np.pi * 2) + assert torch.allclose(yaw_normalized_tensor, expected_tensor) # test box translation expected_tensor = Box3DMode.convert( torch.tensor([[ 1.1281544, -3.0507944, -1.9169292, 1.7597977, 3.4089797, 1.6592377, - 1.9336663 + 1.9336663 - np.pi ], [ 8.098079, -4.9332013, -1.8018866, 1.5486219, - 4.0324507, 1.57879, 1.7936664 + 4.0324507, 1.57879, 1.7936664 - np.pi ], [ 27.64241, -7.2408795, -1.4676381, 1.4782301, - 2.242485, 1.488286, 4.9836664 + 2.242485, 1.488286, 4.9836664 - np.pi ], [ 20.018322, -28.477297, -1.9027928, 1.5687338, - 3.4994833, 1.4078381, 5.1036663 + 3.4994833, 1.4078381, 5.1036663 - np.pi ], [ 28.21472, -16.502048, -1.7878747, 1.7497417, - 3.791107, 1.488286, 0.6236664 + 3.791107, 1.488286, 0.6236664 - np.pi ]]), Box3DMode.LIDAR, Box3DMode.CAM) boxes.translate(torch.tensor([0.13246193, 0.15701613, 0.0838056])) - assert torch.allclose(boxes.tensor, expected_tensor) + yaw_normalized_tensor = boxes.tensor.clone() + yaw_normalized_tensor[:, -1:] = limit_period( + yaw_normalized_tensor[:, -1:], period=np.pi * 2) + expected_tensor[:, -1:] = limit_period( + expected_tensor[:, -1:], period=np.pi * 2) + assert torch.allclose(yaw_normalized_tensor, expected_tensor) # test bbox in_range_bev expected_tensor = torch.tensor([1, 1, 1, 1, 1], dtype=torch.bool) @@ -857,13 +1011,13 @@ def test_camera_boxes3d(): assert (boxes.tensor[:, 6] >= -np.pi / 2).all() Box3DMode.convert(boxes, Box3DMode.LIDAR, Box3DMode.LIDAR) - expected_tesor = boxes.tensor.clone() - assert torch.allclose(expected_tesor, boxes.tensor) + expected_tensor = boxes.tensor.clone() + assert torch.allclose(expected_tensor, boxes.tensor) boxes.flip() boxes.flip() boxes.limit_yaw() - assert torch.allclose(expected_tesor, boxes.tensor) + assert torch.allclose(expected_tensor, boxes.tensor) # test nearest_bev # BEV box in lidar coordinates (x, y) @@ -877,54 +1031,66 @@ def test_camera_boxes3d(): expected_tensor = lidar_expected_tensor.clone() expected_tensor[:, 0::2] = -lidar_expected_tensor[:, [3, 1]] expected_tensor[:, 1::2] = lidar_expected_tensor[:, 0::2] - # the pytorch print loses some precision assert torch.allclose( boxes.nearest_bev, expected_tensor, rtol=1e-4, atol=1e-7) - # obtained by the print of the original implementation - expected_tensor = torch.tensor([[[3.2684e+00, 2.5769e-01, -7.7767e-01], - [1.6232e+00, 2.5769e-01, -1.5301e-01], - [1.6232e+00, 1.9169e+00, -1.5301e-01], - [3.2684e+00, 1.9169e+00, -7.7767e-01], - [4.4784e+00, 2.5769e-01, 2.4093e+00], - [2.8332e+00, 2.5769e-01, 3.0340e+00], - [2.8332e+00, 1.9169e+00, 3.0340e+00], - [4.4784e+00, 1.9169e+00, 2.4093e+00]], - [[5.2427e+00, 2.2310e-01, 5.9606e+00], - [3.7324e+00, 2.2310e-01, 6.3029e+00], - [3.7324e+00, 1.8019e+00, 6.3029e+00], - [5.2427e+00, 1.8019e+00, 5.9606e+00], - [6.1340e+00, 2.2310e-01, 9.8933e+00], - [4.6237e+00, 2.2310e-01, 1.0236e+01], - [4.6237e+00, 1.8019e+00, 1.0236e+01], - [6.1340e+00, 1.8019e+00, 9.8933e+00]], - [[7.6525e+00, -2.0648e-02, 2.6364e+01], - [6.2283e+00, -2.0648e-02, 2.6760e+01], - [6.2283e+00, 1.4676e+00, 2.6760e+01], - [7.6525e+00, 1.4676e+00, 2.6364e+01], - [8.2534e+00, -2.0648e-02, 2.8525e+01], - [6.8292e+00, -2.0648e-02, 2.8921e+01], - [6.8292e+00, 1.4676e+00, 2.8921e+01], - [8.2534e+00, 1.4676e+00, 2.8525e+01]], - [[2.8535e+01, 4.9495e-01, 1.8102e+01], - [2.7085e+01, 4.9495e-01, 1.8700e+01], - [2.7085e+01, 1.9028e+00, 1.8700e+01], - [2.8535e+01, 1.9028e+00, 1.8102e+01], - [2.9870e+01, 4.9495e-01, 2.1337e+01], - [2.8420e+01, 4.9495e-01, 2.1935e+01], - [2.8420e+01, 1.9028e+00, 2.1935e+01], - [2.9870e+01, 1.9028e+00, 2.1337e+01]], - [[1.4452e+01, 2.9959e-01, 2.8612e+01], - [1.5474e+01, 2.9959e-01, 3.0032e+01], - [1.5474e+01, 1.7879e+00, 3.0032e+01], - [1.4452e+01, 1.7879e+00, 2.8612e+01], - [1.7530e+01, 2.9959e-01, 2.6398e+01], - [1.8552e+01, 2.9959e-01, 2.7818e+01], - [1.8552e+01, 1.7879e+00, 2.7818e+01], - [1.7530e+01, 1.7879e+00, 2.6398e+01]]]) - - # the pytorch print loses some precision - assert torch.allclose(boxes.corners, expected_tensor, rtol=1e-4, atol=1e-7) + expected_tensor = torch.tensor([[[2.8332e+00, 2.5769e-01, -7.7767e-01], + [1.6232e+00, 2.5769e-01, 2.4093e+00], + [1.6232e+00, 1.9169e+00, 2.4093e+00], + [2.8332e+00, 1.9169e+00, -7.7767e-01], + [4.4784e+00, 2.5769e-01, -1.5302e-01], + [3.2684e+00, 2.5769e-01, 3.0340e+00], + [3.2684e+00, 1.9169e+00, 3.0340e+00], + [4.4784e+00, 1.9169e+00, -1.5302e-01]], + [[4.6237e+00, 2.2310e-01, 5.9606e+00], + [3.7324e+00, 2.2310e-01, 9.8933e+00], + [3.7324e+00, 1.8019e+00, 9.8933e+00], + [4.6237e+00, 1.8019e+00, 5.9606e+00], + [6.1340e+00, 2.2310e-01, 6.3029e+00], + [5.2427e+00, 2.2310e-01, 1.0236e+01], + [5.2427e+00, 1.8019e+00, 1.0236e+01], + [6.1340e+00, 1.8019e+00, 6.3029e+00]], + [[6.8292e+00, -2.0648e-02, 2.6364e+01], + [6.2283e+00, -2.0648e-02, 2.8525e+01], + [6.2283e+00, 1.4676e+00, 2.8525e+01], + [6.8292e+00, 1.4676e+00, 2.6364e+01], + [8.2534e+00, -2.0648e-02, 2.6760e+01], + [7.6525e+00, -2.0648e-02, 2.8921e+01], + [7.6525e+00, 1.4676e+00, 2.8921e+01], + [8.2534e+00, 1.4676e+00, 2.6760e+01]], + [[2.8420e+01, 4.9495e-01, 1.8102e+01], + [2.7085e+01, 4.9495e-01, 2.1337e+01], + [2.7085e+01, 1.9028e+00, 2.1337e+01], + [2.8420e+01, 1.9028e+00, 1.8102e+01], + [2.9870e+01, 4.9495e-01, 1.8700e+01], + [2.8535e+01, 4.9495e-01, 2.1935e+01], + [2.8535e+01, 1.9028e+00, 2.1935e+01], + [2.9870e+01, 1.9028e+00, 1.8700e+01]], + [[1.4452e+01, 2.9959e-01, 2.7818e+01], + [1.7530e+01, 2.9959e-01, 3.0032e+01], + [1.7530e+01, 1.7879e+00, 3.0032e+01], + [1.4452e+01, 1.7879e+00, 2.7818e+01], + [1.5474e+01, 2.9959e-01, 2.6398e+01], + [1.8552e+01, 2.9959e-01, 2.8612e+01], + [1.8552e+01, 1.7879e+00, 2.8612e+01], + [1.5474e+01, 1.7879e+00, 2.6398e+01]]]) + + assert torch.allclose(boxes.corners, expected_tensor, rtol=1e-3, atol=1e-4) + + th_boxes = torch.tensor( + [[ + 28.29669987, -0.5557558, -1.30332506, 1.47000003, 2.23000002, + 1.48000002, -1.57000005 + ], + [ + 26.66901946, 21.82302134, -1.73605708, 1.55999994, 3.48000002, + 1.39999998, -1.69000006 + ], + [ + 31.31977974, 8.16214412, -1.62177875, 1.74000001, 3.76999998, + 1.48000002, 2.78999996 + ]], + dtype=torch.float32) # test init with a given origin boxes_origin_given = CameraInstance3DBoxes( @@ -947,17 +1113,17 @@ def test_boxes3d_overlaps(): # Test LiDAR boxes 3D overlaps boxes1_tensor = torch.tensor( - [[1.8, -2.5, -1.8, 1.75, 3.39, 1.65, 1.6615927], - [8.9, -2.5, -1.6, 1.54, 4.01, 1.57, 1.5215927], - [28.3, 0.5, -1.3, 1.47, 2.23, 1.48, 4.7115927], - [31.3, -8.2, -1.6, 1.74, 3.77, 1.48, 0.35]], + [[1.8, -2.5, -1.8, 1.75, 3.39, 1.65, -1.6615927], + [8.9, -2.5, -1.6, 1.54, 4.01, 1.57, -1.5215927], + [28.3, 0.5, -1.3, 1.47, 2.23, 1.48, -4.7115927], + [31.3, -8.2, -1.6, 1.74, 3.77, 1.48, -0.35]], device='cuda') boxes1 = LiDARInstance3DBoxes(boxes1_tensor) - boxes2_tensor = torch.tensor([[1.2, -3.0, -1.9, 1.8, 3.4, 1.7, 1.9], - [8.1, -2.9, -1.8, 1.5, 4.1, 1.6, 1.8], - [31.3, -8.2, -1.6, 1.74, 3.77, 1.48, 0.35], - [20.1, -28.5, -1.9, 1.6, 3.5, 1.4, 5.1]], + boxes2_tensor = torch.tensor([[1.2, -3.0, -1.9, 1.8, 3.4, 1.7, -1.9], + [8.1, -2.9, -1.8, 1.5, 4.1, 1.6, -1.8], + [31.3, -8.2, -1.6, 1.74, 3.77, 1.48, -0.35], + [20.1, -28.5, -1.9, 1.6, 3.5, 1.4, -5.1]], device='cuda') boxes2 = LiDARInstance3DBoxes(boxes2_tensor) @@ -1100,6 +1266,7 @@ def test_depth_boxes3d(): [-2.4016, -3.2521, 0.4426, 0.8234, 0.5325, 1.0099, -0.1215], [-2.5181, -2.5298, -0.4321, 0.8597, 0.6193, 1.0204, -0.0493], [-1.5434, -2.4951, -0.5570, 0.9385, 2.1404, 0.8954, -0.0585]]) + expected_tensor[:, -1:] -= 0.022998953275003075 * 2 points, rot_mat_T = boxes_rot.rotate(-0.022998953275003075, points) expected_points = torch.tensor([[-0.7049, -1.2400, -1.4658, 2.5359], [-0.9881, -4.7599, -1.3857, 0.7167], @@ -1114,10 +1281,13 @@ def test_depth_boxes3d(): assert torch.allclose(rot_mat_T, expected_rot_mat_T, 1e-3) # with input torch.Tensor points and rotation matrix - points, rot_mat_T = boxes.rotate(0.022998953275003075, points) # back + points, rot_mat_T = boxes.rotate(-0.022998953275003075, points) # back rot_mat = np.array([[0.99973554, 0.02299693, 0.], [-0.02299693, 0.99973554, 0.], [0., 0., 1.]]) points, rot_mat_T = boxes.rotate(rot_mat, points) + expected_rot_mat_T = torch.tensor([[0.99973554, 0.02299693, 0.0000], + [-0.02299693, 0.99973554, 0.0000], + [0.0000, 0.0000, 1.0000]]) assert torch.allclose(boxes_rot.tensor, expected_tensor, 1e-3) assert torch.allclose(points, expected_points, 1e-3) assert torch.allclose(rot_mat_T, expected_rot_mat_T, 1e-3) @@ -1134,27 +1304,64 @@ def test_depth_boxes3d(): [-0.0974, 6.7093, -0.9697, 0.5599], [0.5669, 0.6404, -0.5265, 1.0032], [-0.4302, 4.5981, -1.4741, 0.0556]]) - expected_rot_mat_T_np = np.array([[0.9997, -0.0230, 0.0000], - [0.0230, 0.9997, 0.0000], + expected_rot_mat_T_np = np.array([[0.99973554, -0.02299693, 0.0000], + [0.02299693, 0.99973554, 0.0000], [0.0000, 0.0000, 1.0000]]) expected_tensor = torch.tensor( [[-1.5434, -2.4951, -0.5570, 0.9385, 2.1404, 0.8954, -0.0585], [-2.4016, -3.2521, 0.4426, 0.8234, 0.5325, 1.0099, -0.1215], [-2.5181, -2.5298, -0.4321, 0.8597, 0.6193, 1.0204, -0.0493], [-1.5434, -2.4951, -0.5570, 0.9385, 2.1404, 0.8954, -0.0585]]) + expected_tensor[:, -1:] -= 0.022998953275003075 * 2 assert torch.allclose(boxes.tensor, expected_tensor, 1e-3) assert np.allclose(points_np, expected_points_np, 1e-3) assert np.allclose(rot_mat_T_np, expected_rot_mat_T_np, 1e-3) # with input DepthPoints and rotation matrix - points_np, rot_mat_T_np = boxes.rotate(0.022998953275003075, points_np) + points_np, rot_mat_T_np = boxes.rotate(-0.022998953275003075, points_np) depth_points = DepthPoints(points_np, points_dim=4) depth_points, rot_mat_T_np = boxes.rotate(rot_mat, depth_points) points_np = depth_points.tensor.numpy() + expected_rot_mat_T_np = expected_rot_mat_T_np.T assert torch.allclose(boxes.tensor, expected_tensor, 1e-3) assert np.allclose(points_np, expected_points_np, 1e-3) assert np.allclose(rot_mat_T_np, expected_rot_mat_T_np, 1e-3) + expected_tensor = torch.tensor([[[-2.1217, -3.5105, -0.5570], + [-2.1217, -3.5105, 0.3384], + [-1.8985, -1.3818, 0.3384], + [-1.8985, -1.3818, -0.5570], + [-1.1883, -3.6084, -0.5570], + [-1.1883, -3.6084, 0.3384], + [-0.9651, -1.4796, 0.3384], + [-0.9651, -1.4796, -0.5570]], + [[-2.8519, -3.4460, 0.4426], + [-2.8519, -3.4460, 1.4525], + [-2.7632, -2.9210, 1.4525], + [-2.7632, -2.9210, 0.4426], + [-2.0401, -3.5833, 0.4426], + [-2.0401, -3.5833, 1.4525], + [-1.9513, -3.0582, 1.4525], + [-1.9513, -3.0582, 0.4426]], + [[-2.9755, -2.7971, -0.4321], + [-2.9755, -2.7971, 0.5883], + [-2.9166, -2.1806, 0.5883], + [-2.9166, -2.1806, -0.4321], + [-2.1197, -2.8789, -0.4321], + [-2.1197, -2.8789, 0.5883], + [-2.0608, -2.2624, 0.5883], + [-2.0608, -2.2624, -0.4321]], + [[-2.1217, -3.5105, -0.5570], + [-2.1217, -3.5105, 0.3384], + [-1.8985, -1.3818, 0.3384], + [-1.8985, -1.3818, -0.5570], + [-1.1883, -3.6084, -0.5570], + [-1.1883, -3.6084, 0.3384], + [-0.9651, -1.4796, 0.3384], + [-0.9651, -1.4796, -0.5570]]]) + + assert torch.allclose(boxes.corners, expected_tensor, 1e-3) + th_boxes = torch.tensor( [[0.61211395, 0.8129094, 0.10563634, 1.497534, 0.16927195, 0.27956772], [1.430009, 0.49797538, 0.9382923, 0.07694054, 0.9312509, 1.8919173]], @@ -1197,11 +1404,11 @@ def test_depth_boxes3d(): [1.5112, -0.0352, 2.8302], [1.5112, 0.8986, 2.8302], [1.5112, 0.8986, 0.9383]]]) - torch.allclose(boxes.corners, expected_tensor) + assert torch.allclose(boxes.corners, expected_tensor, 1e-3) # test points in boxes if torch.cuda.is_available(): - box_idxs_of_pts = boxes.points_in_boxes(points.cuda()) + box_idxs_of_pts = boxes.points_in_boxes_batch(points.cuda()) expected_idxs_of_pts = torch.tensor( [[0, 0], [0, 0], [0, 0], [0, 0], [0, 0]], device='cuda:0', @@ -1210,8 +1417,8 @@ def test_depth_boxes3d(): # test get_surface_line_center boxes = torch.tensor( - [[0.3294, 1.0359, 0.1171, 1.0822, 1.1247, 1.3721, 0.4916], - [-2.4630, -2.6324, -0.1616, 0.9202, 1.7896, 0.1992, 0.3185]]) + [[0.3294, 1.0359, 0.1171, 1.0822, 1.1247, 1.3721, -0.4916], + [-2.4630, -2.6324, -0.1616, 0.9202, 1.7896, 0.1992, -0.3185]]) boxes = DepthInstance3DBoxes( boxes, box_dim=boxes.shape[-1], with_yaw=True, origin=(0.5, 0.5, 0.5)) surface_center, line_center = boxes.get_surface_line_center() @@ -1259,22 +1466,97 @@ def test_depth_boxes3d(): def test_rotation_3d_in_axis(): + # # clockwise + # points = torch.tensor([[[-0.4599, -0.0471, 0.0000], + # [-0.4599, -0.0471, 1.8433], + # [-0.4599, 0.0471, 1.8433]], + # [[-0.2555, -0.2683, 0.0000], + # [-0.2555, -0.2683, 0.9072], + # [-0.2555, 0.2683, 0.9072]]]) + # rotated = rotation_3d_in_axis( + # points, torch.tensor([-np.pi / 10, np.pi / 10]), + # axis=0, clockwise=True) + # expected_rotated = torch.tensor([[[0.0000, -0.4228, -0.1869], + # [1.8433, -0.4228, -0.1869], + # [1.8433, -0.4519, -0.0973]], + # [[0.0000, -0.3259, -0.1762], + # [0.9072, -0.3259, -0.1762], + # [0.9072, -0.1601, 0.3341]]]) + # assert torch.allclose(rotated, expected_rotated, 1e-3) + + # anti-clockwise with return rotation mat points = torch.tensor([[[-0.4599, -0.0471, 0.0000], - [-0.4599, -0.0471, 1.8433], - [-0.4599, 0.0471, 1.8433]], - [[-0.2555, -0.2683, 0.0000], - [-0.2555, -0.2683, 0.9072], - [-0.2555, 0.2683, 0.9072]]]) - rotated = rotation_3d_in_axis( - points, torch.tensor([-np.pi / 10, np.pi / 10]), axis=0) - expected_rotated = torch.tensor([[[0.0000, -0.4228, -0.1869], - [1.8433, -0.4228, -0.1869], - [1.8433, -0.4519, -0.0973]], - [[0.0000, -0.3259, -0.1762], - [0.9072, -0.3259, -0.1762], - [0.9072, -0.1601, 0.3341]]]) + [-0.4599, -0.0471, 1.8433]]]) + rotated = rotation_3d_in_axis(points, torch.tensor([np.pi / 2]), axis=0) + expected_rotated = torch.tensor([[[-0.4599, 0.0000, -0.0471], + [-0.4599, -1.8433, -0.0471]]]) assert torch.allclose(rotated, expected_rotated, 1e-3) + points = torch.tensor([[[-0.4599, -0.0471, 0.0000], + [-0.4599, -0.0471, 1.8433]]]) + rotated, mat = rotation_3d_in_axis( + points, torch.tensor([np.pi / 2]), axis=0, return_mat=True) + expected_rotated = torch.tensor([[[-0.4599, 0.0000, -0.0471], + [-0.4599, -1.8433, -0.0471]]]) + expected_mat = torch.tensor([[[1, 0, 0], [0, 0, 1], [0, -1, 0]]]).float() + assert torch.allclose(rotated, expected_rotated, atol=1e-6) + assert torch.allclose(mat, expected_mat, atol=1e-6) + + points = torch.tensor([[[-0.4599, -0.0471, 0.0000], + [-0.4599, -0.0471, 1.8433]], + [[-0.2555, -0.2683, 0.0000], + [-0.2555, -0.2683, 0.9072]]]) + rotated = rotation_3d_in_axis(points, np.pi / 2, axis=0) + expected_rotated = torch.tensor([[[-0.4599, 0.0000, -0.0471], + [-0.4599, -1.8433, -0.0471]], + [[-0.2555, 0.0000, -0.2683], + [-0.2555, -0.9072, -0.2683]]]) + assert torch.allclose(rotated, expected_rotated, atol=1e-3) + + points = np.array([[[-0.4599, -0.0471, 0.0000], [-0.4599, -0.0471, + 1.8433]], + [[-0.2555, -0.2683, 0.0000], + [-0.2555, -0.2683, 0.9072]]]).astype(np.float32) + + rotated = rotation_3d_in_axis(points, np.pi / 2, axis=0) + expected_rotated = np.array([[[-0.4599, 0.0000, -0.0471], + [-0.4599, -1.8433, -0.0471]], + [[-0.2555, 0.0000, -0.2683], + [-0.2555, -0.9072, -0.2683]]]) + assert np.allclose(rotated, expected_rotated, atol=1e-3) + + points = torch.tensor([[[-0.4599, -0.0471, 0.0000], + [-0.4599, -0.0471, 1.8433]], + [[-0.2555, -0.2683, 0.0000], + [-0.2555, -0.2683, 0.9072]]]) + angles = [np.pi / 2, -np.pi / 2] + rotated = rotation_3d_in_axis(points, angles, axis=0) + expected_rotated = np.array([[[-0.4599, 0.0000, -0.0471], + [-0.4599, -1.8433, -0.0471]], + [[-0.2555, 0.0000, 0.2683], + [-0.2555, 0.9072, 0.2683]]]) + assert np.allclose(rotated, expected_rotated, atol=1e-3) + + points = torch.tensor([[[-0.0471, 0.0000], [-0.0471, 1.8433]], + [[-0.2683, 0.0000], [-0.2683, 0.9072]]]) + angles = [np.pi / 2, -np.pi / 2] + rotated = rotation_3d_in_axis(points, angles) + expected_rotated = np.array([[[0.0000, -0.0471], [-1.8433, -0.0471]], + [[0.0000, 0.2683], [0.9072, 0.2683]]]) + assert np.allclose(rotated, expected_rotated, atol=1e-3) + + +def test_rotation_2d(): + angles = np.array([3.14]) + corners = np.array([[[-0.235, -0.49], [-0.235, 0.49], [0.235, 0.49], + [0.235, -0.49]]]) + corners_rotated = rotation_3d_in_axis(corners, angles) + expected_corners = np.array([[[0.2357801, 0.48962511], + [0.2342193, -0.49037365], + [-0.2357801, -0.48962511], + [-0.2342193, 0.49037365]]]) + assert np.allclose(corners_rotated, expected_corners) + def test_limit_period(): torch.manual_seed(0) @@ -1284,6 +1566,11 @@ def test_limit_period(): [0.3074]]) assert torch.allclose(result, expected_result, 1e-3) + val = val.numpy() + result = limit_period(val) + expected_result = expected_result.numpy() + assert np.allclose(result, expected_result, 1e-3) + def test_xywhr2xyxyr(): torch.manual_seed(0) @@ -1323,3 +1610,14 @@ def test_points_cam2img(): [0.6994, 0.7782], [0.5623, 0.6303], [0.4359, 0.6532]]) assert torch.allclose(point_2d_res, expected_point_2d_res, 1e-3) + + points = points.numpy() + proj_mat = proj_mat.numpy() + point_2d_res = points_cam2img(points, proj_mat) + expected_point_2d_res = expected_point_2d_res.numpy() + assert np.allclose(point_2d_res, expected_point_2d_res, 1e-3) + + points = torch.from_numpy(points) + point_2d_res = points_cam2img(points, proj_mat) + expected_point_2d_res = torch.from_numpy(expected_point_2d_res) + assert torch.allclose(point_2d_res, expected_point_2d_res, 1e-3) diff --git a/tests/test_utils/test_box_np_ops.py b/tests/test_utils/test_box_np_ops.py index 9825d547e..a6beab0b2 100644 --- a/tests/test_utils/test_box_np_ops.py +++ b/tests/test_utils/test_box_np_ops.py @@ -19,7 +19,7 @@ def test_camera_to_lidar(): def test_box_camera_to_lidar(): from mmdet3d.core.bbox.box_np_ops import box_camera_to_lidar - box = np.array([[1.84, 1.47, 8.41, 1.2, 1.89, 0.48, 0.01]]) + box = np.array([[1.84, 1.47, 8.41, 1.2, 1.89, 0.48, -0.01]]) rect = np.array([[0.9999128, 0.01009263, -0.00851193, 0.], [-0.01012729, 0.9999406, -0.00403767, 0.], [0.00847068, 0.00412352, 0.9999556, 0.], [0., 0., 0., @@ -29,8 +29,9 @@ def test_box_camera_to_lidar(): [0.9999753, 0.00693114, -0.0011439, -0.3321029], [0., 0., 0., 1.]]) box_lidar = box_camera_to_lidar(box, rect, Trv2c) - expected_box = np.array( - [[8.73138192, -1.85591746, -1.59969933, 0.48, 1.2, 1.89, 0.01]]) + expected_box = np.array([[ + 8.73138192, -1.85591746, -1.59969933, 1.2, 0.48, 1.89, 0.01 - np.pi / 2 + ]]) assert np.allclose(box_lidar, expected_box) @@ -47,22 +48,17 @@ def test_center_to_corner_box2d(): from mmdet3d.core.bbox.box_np_ops import center_to_corner_box2d center = np.array([[9.348705, -3.6271024]]) dims = np.array([[0.47, 0.98]]) - angles = np.array([-3.14]) + angles = np.array([3.14]) corner = center_to_corner_box2d(center, dims, angles) expected_corner = np.array([[[9.584485, -3.1374772], [9.582925, -4.117476], [9.112926, -4.1167274], [9.114486, -3.1367288]]]) assert np.allclose(corner, expected_corner) - -def test_rotation_2d(): - from mmdet3d.core.bbox.box_np_ops import rotation_2d - angles = np.array([-3.14]) - corners = np.array([[[-0.235, -0.49], [-0.235, 0.49], [0.235, 0.49], - [0.235, -0.49]]]) - corners_rotated = rotation_2d(corners, angles) - expected_corners = np.array([[[0.2357801, 0.48962511], - [0.2342193, -0.49037365], - [-0.2357801, -0.48962511], - [-0.2342193, 0.49037365]]]) - assert np.allclose(corners_rotated, expected_corners) + center = np.array([[-0.0, 0.0]]) + dims = np.array([[4.0, 8.0]]) + angles = np.array([-0.785398]) # -45 degrees + corner = center_to_corner_box2d(center, dims, angles) + expected_corner = np.array([[[-4.24264, -1.41421], [1.41421, 4.24264], + [4.24264, 1.41421], [-1.41421, -4.24264]]]) + assert np.allclose(corner, expected_corner) diff --git a/tests/test_utils/test_coord_3d_mode.py b/tests/test_utils/test_coord_3d_mode.py index ea1f2e3a8..123c84103 100644 --- a/tests/test_utils/test_coord_3d_mode.py +++ b/tests/test_utils/test_coord_3d_mode.py @@ -2,7 +2,8 @@ import torch from mmdet3d.core.bbox import (CameraInstance3DBoxes, Coord3DMode, - DepthInstance3DBoxes, LiDARInstance3DBoxes) + DepthInstance3DBoxes, LiDARInstance3DBoxes, + limit_period) from mmdet3d.core.points import CameraPoints, DepthPoints, LiDARPoints @@ -241,22 +242,31 @@ def test_boxes_conversion(): convert_lidar_boxes = Coord3DMode.convert(cam_boxes, Coord3DMode.CAM, Coord3DMode.LIDAR) - expected_tensor = torch.tensor( - [[-1.7501, -1.7802, -2.5162, 1.6500, 1.7500, 3.3900, 1.4800], - [-1.6357, -8.9594, -2.4567, 1.5700, 1.5400, 4.0100, 1.6200], - [-1.3033, -28.2967, 0.5558, 1.4800, 1.4700, 2.2300, -1.5700], - [-1.7361, -26.6690, -21.8230, 1.4000, 1.5600, 3.4800, -1.6900], - [-1.6218, -31.3198, -8.1621, 1.4800, 1.7400, 3.7700, 2.7900]]) + expected_tensor = torch.tensor([[ + -1.7501, -1.7802, -2.5162, 1.7500, 1.6500, 3.3900, -1.4800 - np.pi / 2 + ], [ + -1.6357, -8.9594, -2.4567, 1.5400, 1.5700, 4.0100, -1.6200 - np.pi / 2 + ], [-1.3033, -28.2967, 0.5558, 1.4700, 1.4800, 2.2300, 1.5700 - np.pi / 2], + [ + -1.7361, -26.6690, -21.8230, 1.5600, + 1.4000, 3.4800, 1.6900 - np.pi / 2 + ], + [ + -1.6218, -31.3198, -8.1621, 1.7400, + 1.4800, 3.7700, -2.7900 - np.pi / 2 + ]]) + expected_tensor[:, -1:] = limit_period( + expected_tensor[:, -1:], period=np.pi * 2) assert torch.allclose(expected_tensor, convert_lidar_boxes.tensor, 1e-3) convert_depth_boxes = Coord3DMode.convert(cam_boxes, Coord3DMode.CAM, Coord3DMode.DEPTH) expected_tensor = torch.tensor( - [[1.7802, 1.7501, 2.5162, 1.7500, 1.6500, 3.3900, 1.4800], - [8.9594, 1.6357, 2.4567, 1.5400, 1.5700, 4.0100, 1.6200], - [28.2967, 1.3033, -0.5558, 1.4700, 1.4800, 2.2300, -1.5700], - [26.6690, 1.7361, 21.8230, 1.5600, 1.4000, 3.4800, -1.6900], - [31.3198, 1.6218, 8.1621, 1.7400, 1.4800, 3.7700, 2.7900]]) + [[1.7802, 1.7501, 2.5162, 1.7500, 1.6500, 3.3900, -1.4800], + [8.9594, 1.6357, 2.4567, 1.5400, 1.5700, 4.0100, -1.6200], + [28.2967, 1.3033, -0.5558, 1.4700, 1.4800, 2.2300, 1.5700], + [26.6690, 1.7361, 21.8230, 1.5600, 1.4000, 3.4800, 1.6900], + [31.3198, 1.6218, 8.1621, 1.7400, 1.4800, 3.7700, -2.7900]]) assert torch.allclose(expected_tensor, convert_depth_boxes.tensor, 1e-3) # test LIDAR to CAM and DEPTH @@ -268,22 +278,42 @@ def test_boxes_conversion(): [31.31978, 8.162144, -1.6217787, 1.74, 3.77, 1.48, 2.79]]) convert_cam_boxes = Coord3DMode.convert(lidar_boxes, Coord3DMode.LIDAR, Coord3DMode.CAM) - expected_tensor = torch.tensor( - [[-2.5162, 1.7501, 1.7802, 3.3900, 1.6500, 1.7500, 1.4800], - [-2.4567, 1.6357, 8.9594, 4.0100, 1.5700, 1.5400, 1.6200], - [0.5558, 1.3033, 28.2967, 2.2300, 1.4800, 1.4700, -1.5700], - [-21.8230, 1.7361, 26.6690, 3.4800, 1.4000, 1.5600, -1.6900], - [-8.1621, 1.6218, 31.3198, 3.7700, 1.4800, 1.7400, 2.7900]]) + expected_tensor = torch.tensor([ + [-2.5162, 1.7501, 1.7802, 1.7500, 1.6500, 3.3900, -1.4800 - np.pi / 2], + [-2.4567, 1.6357, 8.9594, 1.5400, 1.5700, 4.0100, -1.6200 - np.pi / 2], + [0.5558, 1.3033, 28.2967, 1.4700, 1.4800, 2.2300, 1.5700 - np.pi / 2], + [ + -21.8230, 1.7361, 26.6690, 1.5600, 1.4000, 3.4800, + 1.6900 - np.pi / 2 + ], + [ + -8.1621, 1.6218, 31.3198, 1.7400, 1.4800, 3.7700, + -2.7900 - np.pi / 2 + ] + ]) + expected_tensor[:, -1:] = limit_period( + expected_tensor[:, -1:], period=np.pi * 2) assert torch.allclose(expected_tensor, convert_cam_boxes.tensor, 1e-3) convert_depth_boxes = Coord3DMode.convert(lidar_boxes, Coord3DMode.LIDAR, Coord3DMode.DEPTH) - expected_tensor = torch.tensor( - [[-2.5162, 1.7802, -1.7501, 3.3900, 1.7500, 1.6500, 1.4800], - [-2.4567, 8.9594, -1.6357, 4.0100, 1.5400, 1.5700, 1.6200], - [0.5558, 28.2967, -1.3033, 2.2300, 1.4700, 1.4800, -1.5700], - [-21.8230, 26.6690, -1.7361, 3.4800, 1.5600, 1.4000, -1.6900], - [-8.1621, 31.3198, -1.6218, 3.7700, 1.7400, 1.4800, 2.7900]]) + expected_tensor = torch.tensor([[ + -2.5162, 1.7802, -1.7501, 1.7500, 3.3900, 1.6500, 1.4800 + np.pi / 2 + ], [-2.4567, 8.9594, -1.6357, 1.5400, 4.0100, 1.5700, 1.6200 + np.pi / 2], + [ + 0.5558, 28.2967, -1.3033, 1.4700, + 2.2300, 1.4800, -1.5700 + np.pi / 2 + ], + [ + -21.8230, 26.6690, -1.7361, 1.5600, + 3.4800, 1.4000, -1.6900 + np.pi / 2 + ], + [ + -8.1621, 31.3198, -1.6218, 1.7400, + 3.7700, 1.4800, 2.7900 + np.pi / 2 + ]]) + expected_tensor[:, -1:] = limit_period( + expected_tensor[:, -1:], period=np.pi * 2) assert torch.allclose(expected_tensor, convert_depth_boxes.tensor, 1e-3) # test DEPTH to CAM and LIDAR @@ -296,19 +326,25 @@ def test_boxes_conversion(): convert_cam_boxes = Coord3DMode.convert(depth_boxes, Coord3DMode.DEPTH, Coord3DMode.CAM) expected_tensor = torch.tensor( - [[1.7802, -1.7501, -2.5162, 1.7500, 1.6500, 3.3900, 1.4800], - [8.9594, -1.6357, -2.4567, 1.5400, 1.5700, 4.0100, 1.6200], - [28.2967, -1.3033, 0.5558, 1.4700, 1.4800, 2.2300, -1.5700], - [26.6690, -1.7361, -21.8230, 1.5600, 1.4000, 3.4800, -1.6900], - [31.3198, -1.6218, -8.1621, 1.7400, 1.4800, 3.7700, 2.7900]]) + [[1.7802, -1.7501, -2.5162, 1.7500, 1.6500, 3.3900, -1.4800], + [8.9594, -1.6357, -2.4567, 1.5400, 1.5700, 4.0100, -1.6200], + [28.2967, -1.3033, 0.5558, 1.4700, 1.4800, 2.2300, 1.5700], + [26.6690, -1.7361, -21.8230, 1.5600, 1.4000, 3.4800, 1.6900], + [31.3198, -1.6218, -8.1621, 1.7400, 1.4800, 3.7700, -2.7900]]) assert torch.allclose(expected_tensor, convert_cam_boxes.tensor, 1e-3) convert_lidar_boxes = Coord3DMode.convert(depth_boxes, Coord3DMode.DEPTH, Coord3DMode.LIDAR) - expected_tensor = torch.tensor( - [[2.5162, -1.7802, -1.7501, 3.3900, 1.7500, 1.6500, 1.4800], - [2.4567, -8.9594, -1.6357, 4.0100, 1.5400, 1.5700, 1.6200], - [-0.5558, -28.2967, -1.3033, 2.2300, 1.4700, 1.4800, -1.5700], - [21.8230, -26.6690, -1.7361, 3.4800, 1.5600, 1.4000, -1.6900], - [8.1621, -31.3198, -1.6218, 3.7700, 1.7400, 1.4800, 2.7900]]) + expected_tensor = torch.tensor([[ + 2.5162, -1.7802, -1.7501, 1.7500, 3.3900, 1.6500, 1.4800 - np.pi / 2 + ], [ + 2.4567, -8.9594, -1.6357, 1.5400, 4.0100, 1.5700, 1.6200 - np.pi / 2 + ], [ + -0.5558, -28.2967, -1.3033, 1.4700, 2.2300, 1.4800, -1.5700 - np.pi / 2 + ], [ + 21.8230, -26.6690, -1.7361, 1.5600, 3.4800, 1.4000, -1.6900 - np.pi / 2 + ], [8.1621, -31.3198, -1.6218, 1.7400, 3.7700, 1.4800, + 2.7900 - np.pi / 2]]) + expected_tensor[:, -1:] = limit_period( + expected_tensor[:, -1:], period=np.pi * 2) assert torch.allclose(expected_tensor, convert_lidar_boxes.tensor, 1e-3) diff --git a/tests/test_utils/test_utils.py b/tests/test_utils/test_utils.py index 7493dc83e..00a313018 100644 --- a/tests/test_utils/test_utils.py +++ b/tests/test_utils/test_utils.py @@ -1,6 +1,8 @@ +import numpy as np +import pytest import torch -from mmdet3d.core import draw_heatmap_gaussian +from mmdet3d.core import array_converter, draw_heatmap_gaussian def test_gaussian(): @@ -9,3 +11,169 @@ def test_gaussian(): radius = 2 draw_heatmap_gaussian(heatmap, ct_int, radius) assert torch.isclose(torch.sum(heatmap), torch.tensor(4.3505), atol=1e-3) + + +def test_array_converter(): + # to torch + @array_converter(to_torch=True, apply_to=('array_a', 'array_b')) + def test_func_1(array_a, array_b, container): + container.append(array_a) + container.append(array_b) + return array_a.clone(), array_b.clone() + + np_array_a = np.array([0.0]) + np_array_b = np.array([0.0]) + container = [] + new_array_a, new_array_b = test_func_1(np_array_a, np_array_b, container) + + assert isinstance(new_array_a, np.ndarray) + assert isinstance(new_array_b, np.ndarray) + assert isinstance(container[0], torch.Tensor) + assert isinstance(container[1], torch.Tensor) + + # one to torch and one not + @array_converter(to_torch=True, apply_to=('array_a', )) + def test_func_2(array_a, array_b): + return torch.cat([array_a, array_b]) + + with pytest.raises(TypeError): + _ = test_func_2(np_array_a, np_array_b) + + # wrong template_arg_name_ + @array_converter( + to_torch=True, apply_to=('array_a', ), template_arg_name_='array_c') + def test_func_3(array_a, array_b): + return torch.cat([array_a, array_b]) + + with pytest.raises(ValueError): + _ = test_func_3(np_array_a, np_array_b) + + # wrong apply_to + @array_converter(to_torch=True, apply_to=('array_a', 'array_c')) + def test_func_4(array_a, array_b): + return torch.cat([array_a, array_b]) + + with pytest.raises(ValueError): + _ = test_func_4(np_array_a, np_array_b) + + # to numpy + @array_converter(to_torch=False, apply_to=('array_a', 'array_b')) + def test_func_5(array_a, array_b, container): + container.append(array_a) + container.append(array_b) + return array_a.copy(), array_b.copy() + + pt_array_a = torch.tensor([0.0]) + pt_array_b = torch.tensor([0.0]) + container = [] + new_array_a, new_array_b = test_func_5(pt_array_a, pt_array_b, container) + + assert isinstance(container[0], np.ndarray) + assert isinstance(container[1], np.ndarray) + assert isinstance(new_array_a, torch.Tensor) + assert isinstance(new_array_b, torch.Tensor) + + # apply_to = None + @array_converter(to_torch=False) + def test_func_6(array_a, array_b, container): + container.append(array_a) + container.append(array_b) + return array_a.clone(), array_b.clone() + + container = [] + new_array_a, new_array_b = test_func_6(pt_array_a, pt_array_b, container) + + assert isinstance(container[0], torch.Tensor) + assert isinstance(container[1], torch.Tensor) + assert isinstance(new_array_a, torch.Tensor) + assert isinstance(new_array_b, torch.Tensor) + + # with default arg + @array_converter(to_torch=True, apply_to=('array_a', 'array_b')) + def test_func_7(array_a, container, array_b=np.array([2.])): + container.append(array_a) + container.append(array_b) + return array_a.clone(), array_b.clone() + + container = [] + new_array_a, new_array_b = test_func_7(np_array_a, container) + + assert isinstance(container[0], torch.Tensor) + assert isinstance(container[1], torch.Tensor) + assert isinstance(new_array_a, np.ndarray) + assert isinstance(new_array_b, np.ndarray) + assert np.allclose(new_array_b, np.array([2.]), 1e-3) + + # override default arg + + container = [] + new_array_a, new_array_b = test_func_7(np_array_a, container, + np.array([4.])) + + assert isinstance(container[0], torch.Tensor) + assert isinstance(container[1], torch.Tensor) + assert isinstance(new_array_a, np.ndarray) + assert np.allclose(new_array_b, np.array([4.]), 1e-3) + + # list arg + @array_converter(to_torch=True, apply_to=('array_a', 'array_b')) + def test_func_8(container, array_a, array_b=[2.]): + container.append(array_a) + container.append(array_b) + return array_a.clone(), array_b.clone() + + container = [] + new_array_a, new_array_b = test_func_8(container, [3.]) + + assert isinstance(container[0], torch.Tensor) + assert isinstance(container[1], torch.Tensor) + assert np.allclose(new_array_a, np.array([3.]), 1e-3) + assert np.allclose(new_array_b, np.array([2.]), 1e-3) + + # number arg + @array_converter(to_torch=True, apply_to=('array_a', 'array_b')) + def test_func_9(container, array_a, array_b=1): + container.append(array_a) + container.append(array_b) + return array_a.clone(), array_b.clone() + + container = [] + new_array_a, new_array_b = test_func_9(container, np_array_a) + + assert isinstance(container[0], torch.FloatTensor) + assert isinstance(container[1], torch.FloatTensor) + assert np.allclose(new_array_a, np_array_a, 1e-3) + assert np.allclose(new_array_b, np.array(1.0), 1e-3) + + # feed kwargs + container = [] + kwargs = {'array_a': [5.], 'array_b': [6.]} + new_array_a, new_array_b = test_func_8(container, **kwargs) + + assert isinstance(container[0], torch.Tensor) + assert isinstance(container[1], torch.Tensor) + assert np.allclose(new_array_a, np.array([5.]), 1e-3) + assert np.allclose(new_array_b, np.array([6.]), 1e-3) + + # feed args and kwargs + container = [] + kwargs = {'array_b': [7.]} + args = (container, [8.]) + new_array_a, new_array_b = test_func_8(*args, **kwargs) + + assert isinstance(container[0], torch.Tensor) + assert isinstance(container[1], torch.Tensor) + assert np.allclose(new_array_a, np.array([8.]), 1e-3) + assert np.allclose(new_array_b, np.array([7.]), 1e-3) + + # wrong template arg type + with pytest.raises(TypeError): + new_array_a, new_array_b = test_func_9(container, 3 + 4j) + + with pytest.raises(TypeError): + new_array_a, new_array_b = test_func_9(container, {}) + + # invalid template arg list + with pytest.raises(TypeError): + new_array_a, new_array_b = test_func_9(container, + [True, np.array([3.0])]) diff --git a/tools/create_data.py b/tools/create_data.py index b761f3e37..9935d6cfb 100644 --- a/tools/create_data.py +++ b/tools/create_data.py @@ -227,7 +227,7 @@ def waymo_data_prep(root_path, '--out-dir', type=str, default='./data/kitti', - required='False', + required=False, help='name of info pkl') parser.add_argument('--extra-tag', type=str, default='kitti') parser.add_argument( diff --git a/tools/data_converter/kitti_converter.py b/tools/data_converter/kitti_converter.py index 68a96e618..796db9a02 100644 --- a/tools/data_converter/kitti_converter.py +++ b/tools/data_converter/kitti_converter.py @@ -4,7 +4,7 @@ from nuscenes.utils.geometry_utils import view_points from pathlib import Path -from mmdet3d.core.bbox import box_np_ops +from mmdet3d.core.bbox import box_np_ops, points_cam2img from .kitti_data_utils import get_kitti_image_info, get_waymo_image_info from .nuscenes_converter import post_process_coords @@ -470,7 +470,7 @@ def get_2d_boxes(info, occluded, mono3d=True): repro_rec['velo_cam3d'] = -1 # no velocity in KITTI center3d = np.array(loc).reshape([1, 3]) - center2d = box_np_ops.points_cam2img( + center2d = points_cam2img( center3d, camera_intrinsic, with_depth=True) repro_rec['center2d'] = center2d.squeeze().tolist() # normalized center2D + depth diff --git a/tools/data_converter/lyft_converter.py b/tools/data_converter/lyft_converter.py index 7d4517fda..be26f5b8e 100644 --- a/tools/data_converter/lyft_converter.py +++ b/tools/data_converter/lyft_converter.py @@ -190,8 +190,10 @@ def _fill_trainval_infos(lyft, names[i] = LyftDataset.NameMapping[names[i]] names = np.array(names) - # we need to convert rot to SECOND format. - gt_boxes = np.concatenate([locs, dims, -rots - np.pi / 2], axis=1) + # we need to convert box size to + # the format of our lidar coordinate system + # which is dx, dy, dz (corresponding to l, w, h) + gt_boxes = np.concatenate([locs, dims[:, [1, 0, 2]], rots], axis=1) assert len(gt_boxes) == len( annotations), f'{len(gt_boxes)}, {len(annotations)}' info['gt_boxes'] = gt_boxes diff --git a/tools/data_converter/nuscenes_converter.py b/tools/data_converter/nuscenes_converter.py index d7da7306c..35f54f87e 100644 --- a/tools/data_converter/nuscenes_converter.py +++ b/tools/data_converter/nuscenes_converter.py @@ -9,7 +9,7 @@ from shapely.geometry import MultiPoint, box from typing import List, Tuple, Union -from mmdet3d.core.bbox.box_np_ops import points_cam2img +from mmdet3d.core.bbox import points_cam2img from mmdet3d.datasets import NuScenesDataset nus_categories = ('car', 'truck', 'trailer', 'bus', 'construction_vehicle', @@ -248,8 +248,10 @@ def _fill_trainval_infos(nusc, if names[i] in NuScenesDataset.NameMapping: names[i] = NuScenesDataset.NameMapping[names[i]] names = np.array(names) - # we need to convert rot to SECOND format. - gt_boxes = np.concatenate([locs, dims, -rots - np.pi / 2], axis=1) + # we need to convert box size to + # the format of our lidar coordinate system + # which is dx, dy, dz (corresponding to l, w, h) + gt_boxes = np.concatenate([locs, dims[:, [1, 0, 2]], rots], axis=1) assert len(gt_boxes) == len( annotations), f'{len(gt_boxes)}, {len(annotations)}' info['gt_boxes'] = gt_boxes diff --git a/tools/data_converter/sunrgbd_data_utils.py b/tools/data_converter/sunrgbd_data_utils.py index 75b14ed38..58e333ee9 100644 --- a/tools/data_converter/sunrgbd_data_utils.py +++ b/tools/data_converter/sunrgbd_data_utils.py @@ -41,18 +41,17 @@ def __init__(self, line): self.ymax = data[2] + data[4] self.box2d = np.array([self.xmin, self.ymin, self.xmax, self.ymax]) self.centroid = np.array([data[5], data[6], data[7]]) - self.w = data[8] - self.l = data[9] # noqa: E741 - self.h = data[10] + # data[9] is dx (l), data[8] is dy (w), data[10] is dz (h) + # in our depth coordinate system, + # l corresponds to the size along the x axis + self.size = np.array([data[9], data[8], data[10]]) * 2 self.orientation = np.zeros((3, )) self.orientation[0] = data[11] self.orientation[1] = data[12] - self.heading_angle = -1 * np.arctan2(self.orientation[1], - self.orientation[0]) - self.box3d = np.concatenate([ - self.centroid, - np.array([self.l * 2, self.w * 2, self.h * 2, self.heading_angle]) - ]) + self.heading_angle = np.arctan2(self.orientation[1], + self.orientation[0]) + self.box3d = np.concatenate( + [self.centroid, self.size, self.heading_angle[None]]) class SUNRGBDData(object):